Merge tag 'mips-pull-2020-10-07' of https://gitlab.denx.de/u-boot/custodians/u-boot...
authorTom Rini <trini@konsulko.com>
Wed, 7 Oct 2020 21:25:25 +0000 (17:25 -0400)
committerTom Rini <trini@konsulko.com>
Wed, 7 Oct 2020 21:25:25 +0000 (17:25 -0400)
- mips: octeon: add support for DDR4 memory controller
- mips: octeon: add support for DWC3 USB
- mips: octeon: add support for booting Linux

37 files changed:
arch/mips/dts/mrvl,cn73xx.dtsi
arch/mips/dts/mrvl,octeon-ebb7304.dts
arch/mips/mach-octeon/Makefile
arch/mips/mach-octeon/bootoctlinux.c [new file with mode: 0644]
arch/mips/mach-octeon/cache.c
arch/mips/mach-octeon/cpu.c
arch/mips/mach-octeon/cvmx-bootmem.c [new file with mode: 0644]
arch/mips/mach-octeon/cvmx-coremask.c [new file with mode: 0644]
arch/mips/mach-octeon/dram.c
arch/mips/mach-octeon/include/mach/bootoct_cmd.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx-bootmem.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx-coremask.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx-fuse.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx-regs.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/octeon-feature.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/octeon-model.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mach/octeon_ddr.h [new file with mode: 0644]
arch/mips/mach-octeon/include/mangle-port.h [new file with mode: 0644]
arch/mips/mach-octeon/lowlevel_init.S
board/Marvell/octeon_ebb7304/board.c
board/Marvell/octeon_ebb7304/board_ddr.h [new file with mode: 0644]
configs/octeon_ebb7304_defconfig
drivers/ram/Kconfig
drivers/ram/Makefile
drivers/ram/octeon/Kconfig [new file with mode: 0644]
drivers/ram/octeon/Makefile [new file with mode: 0644]
drivers/ram/octeon/dimm_spd_eeprom.c [new file with mode: 0644]
drivers/ram/octeon/octeon3_lmc.c [new file with mode: 0644]
drivers/ram/octeon/octeon_ddr.c [new file with mode: 0644]
drivers/usb/host/Kconfig
drivers/usb/host/Makefile
drivers/usb/host/dwc3-octeon-glue.c [new file with mode: 0644]
drivers/usb/host/xhci-dwc3.c
drivers/usb/host/xhci-ring.c
include/configs/octeon_common.h

index f5ad4a6..40eb85e 100644 (file)
                                     <0x0300e 4>, <0x0300f 4>;
                };
 
+               l2c: l2c@1180080000000 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "cavium,octeon-7xxx-l2c";
+                       reg = <0x11800 0x80000000 0x0 0x01000000>;
+                       u-boot,dm-pre-reloc;
+               };
+
+               lmc: lmc@1180088000000 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "cavium,octeon-7xxx-ddr4";
+                       reg = <0x11800 0x88000000 0x0 0x02000000>; // 2 IFs
+                       u-boot,dm-pre-reloc;
+                       l2c-handle = <&l2c>;
+               };
+
                reset: reset@1180006001600 {
                        compatible = "mrvl,cn7xxx-rst";
                        reg = <0x11800 0x06001600 0x0 0x200>;
                        spi-max-frequency = <25000000>;
                        clocks = <&clk OCTEON_CLK_IO>;
                };
+
+               /* USB 0 */
+               usb0: uctl@1180068000000 {
+                       compatible = "cavium,octeon-7130-usb-uctl";
+                       reg = <0x11800 0x68000000 0x0 0x100>;
+                       ranges; /* Direct mapping */
+                       #address-cells = <2>;
+                       #size-cells = <2>;
+                       /* Only 100MHz allowed */
+                       refclk-frequency = <100000000>;
+                       /* Only "dlmc_ref_clk0" is supported for 73xx */
+                       refclk-type-ss = "dlmc_ref_clk0";
+                       /* Only "dlmc_ref_clk0" is supported for 73xx */
+                       refclk-type-hs = "dlmc_ref_clk0";
+
+                       /*
+                        * Power is specified by three parts:
+                        * 1) GPIO handle (must be &gpio)
+                        * 2) GPIO pin number
+                        * 3) Active high (0) or active low (1)
+                        */
+                       xhci@1680000000000 {
+                               compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3";
+                               reg = <0x16800 0x00000000 0x10 0x0>;
+                               interrupts = <0x68080 4>; /* UAHC_IMAN, level */
+                               maximum-speed = "super-speed";
+                               dr_mode = "host";
+                               snps,dis_u3_susphy_quirk;
+                               snps,dis_u2_susphy_quirk;
+                               snps,dis_enblslpm_quirk;
+                       };
+               };
+
+               /* USB 1 */
+               usb1: uctl@1180069000000 {
+                       compatible = "cavium,octeon-7130-usb-uctl";
+                       reg = <0x11800 0x69000000 0x0 0x100>;
+                       ranges; /* Direct mapping */
+                       #address-cells = <2>;
+                       #size-cells = <2>;
+                       /* 50MHz, 100MHz and 125MHz allowed */
+                       refclk-frequency = <100000000>;
+                       /* Either "dlmc_ref_clk0" or "dlmc_ref_clk0" */
+                       refclk-type-ss = "dlmc_ref_clk0";
+                       /* Either "dlmc_ref_clk0" "dlmc_ref_clk1" or "pll_ref_clk" */
+                       refclk-type-hs = "dlmc_ref_clk0";
+
+                       /*
+                        * Power is specified by three parts:
+                        * 1) GPIO handle (must be &gpio)
+                        * 2) GPIO pin number
+                        * 3) Active high (0) or active low (1)
+                        */
+                       xhci@1690000000000 {
+                               compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3";
+                               reg = <0x16900 0x00000000 0x10 0x0>;
+                               interrupts = <0x69080 4>; /* UAHC_IMAN, level */
+                               dr_mode = "host";
+                       };
+               };
        };
 };
index 6b2e5e8..993b4f6 100644 (file)
                reg = <0>;
        };
 };
+
+/* USB 0 */
+&usb0 {
+       status = "okay";
+       /*
+        * Power is specified by three parts:
+        * 1) GPIO handle (must be &gpio)
+        * 2) GPIO pin number
+        * 3) Active high (0) or active low (1)
+        */
+       power = <&gpio 20 0>;
+};
+
+/* USB 1 */
+&usb1 {
+       status = "okay";
+       /*
+        * Power is specified by three parts:
+        * 1) GPIO handle (must be &gpio)
+        * 2) GPIO pin number
+        * 3) Active high (0) or active low (1)
+        */
+       power = <&gpio 21 0>;
+};
index 2e37ca5..3486aa9 100644 (file)
@@ -8,3 +8,6 @@ obj-y += cache.o
 obj-y += clock.o
 obj-y += cpu.o
 obj-y += dram.o
+obj-y += cvmx-coremask.o
+obj-y += cvmx-bootmem.o
+obj-y += bootoctlinux.o
diff --git a/arch/mips/mach-octeon/bootoctlinux.c b/arch/mips/mach-octeon/bootoctlinux.c
new file mode 100644 (file)
index 0000000..75d7e83
--- /dev/null
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ */
+
+#include <command.h>
+#include <config.h>
+#include <cpu_func.h>
+#include <dm.h>
+#include <elf.h>
+#include <env.h>
+#include <ram.h>
+
+#include <asm/io.h>
+#include <linux/compat.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-bootinfo.h>
+#include <mach/cvmx-bootmem.h>
+#include <mach/cvmx-regs.h>
+#include <mach/cvmx-fuse.h>
+#include <mach/octeon-model.h>
+#include <mach/octeon-feature.h>
+#include <mach/bootoct_cmd.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* ToDo: Revisit these settings */
+#define OCTEON_RESERVED_LOW_MEM_SIZE           (512 * 1024)
+#define OCTEON_RESERVED_LOW_BOOT_MEM_SIZE      (1024 * 1024)
+#define BOOTLOADER_BOOTMEM_DESC_SPACE          (1024 * 1024)
+
+/* Default stack and heap sizes, in bytes */
+#define DEFAULT_STACK_SIZE                     (1 * 1024 * 1024)
+#define DEFAULT_HEAP_SIZE                      (3 * 1024 * 1024)
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+enum {
+       /* If set, core should do app-wide init, only one core per app will have
+        * this flag set.
+        */
+       BOOT_FLAG_INIT_CORE     = 1,
+       OCTEON_BL_FLAG_DEBUG    = 1 << 1,
+       OCTEON_BL_FLAG_NO_MAGIC = 1 << 2,
+       /* If set, use uart1 for console */
+       OCTEON_BL_FLAG_CONSOLE_UART1 = 1 << 3,
+       OCTEON_BL_FLAG_CONSOLE_PCI = 1 << 4,    /* If set, use PCI console */
+       /* Call exit on break on serial port */
+       OCTEON_BL_FLAG_BREAK    = 1 << 5,
+       /*
+        * Be sure to update OCTEON_APP_INIT_H_VERSION when new fields are added
+        * and to conditionalize the new flag's usage based on the version.
+        */
+} octeon_boot_descriptor_flag;
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+#ifndef OCTEON_CURRENT_DESC_VERSION
+# define OCTEON_CURRENT_DESC_VERSION   7
+#endif
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+/* Version 7 changes: Change names of deprecated fields */
+#ifndef OCTEON_ARGV_MAX_ARGS
+# define OCTEON_ARGV_MAX_ARGS          64
+#endif
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+#ifndef OCTEON_SERIAL_LEN
+# define OCTEON_SERIAL_LEN             20
+#endif
+
+/**
+ * Bootloader structure used to pass info to Octeon executive startup code.
+ * NOTE: all fields are deprecated except for:
+ *  * desc_version
+ *  * desc_size,
+ *  * heap_base
+ *  * heap_end
+ *  * eclock_hz
+ *  * flags
+ *  * argc
+ *  * argv
+ *  * cvmx_desc_vaddr
+ *  * debugger_flags_base_addr
+ *
+ *  All other fields have been moved to the cvmx_descriptor, and the new
+ *  fields should be added there. They are left as placeholders in this
+ *  structure for binary compatibility.
+ *
+ * NOTE: This structure must match what is in the toolchain octeon-app-init.h
+ * file.
+ */
+struct octeon_boot_descriptor {
+       /* Start of block referenced by assembly code - do not change! */
+       u32 desc_version;
+       u32 desc_size;
+       u64 stack_top;
+       u64 heap_base;
+       u64 heap_end;
+       u64 deprecated17;
+       u64 deprecated16;
+       /* End of block referenced by assembly code - do not change! */
+       u32 deprecated18;
+       u32 deprecated15;
+       u32 deprecated14;
+       u32 argc;  /* argc for main() */
+       u32 argv[OCTEON_ARGV_MAX_ARGS];  /* argv for main() */
+       u32 flags;   /* Flags for application */
+       u32 core_mask;   /* Coremask running this image */
+       u32 dram_size;  /* DEPRECATED, DRAM size in megabyes. Used up to SDK 1.8.1 */
+       u32 phy_mem_desc_addr;
+       u32 debugger_flags_base_addr;  /* used to pass flags from app to debugger. */
+       u32 eclock_hz;  /* CPU clock speed, in hz. */
+       u32 deprecated10;
+       u32 deprecated9;
+       u16 deprecated8;
+       u8  deprecated7;
+       u8  deprecated6;
+       u16 deprecated5;
+       u8  deprecated4;
+       u8  deprecated3;
+       char deprecated2[OCTEON_SERIAL_LEN];
+       u8  deprecated1[6];
+       u8  deprecated0;
+       u64 cvmx_desc_vaddr;  /* Address of cvmx descriptor */
+};
+
+static struct octeon_boot_descriptor boot_desc[CVMX_MIPS_MAX_CORES];
+static struct cvmx_bootinfo cvmx_bootinfo_array[CVMX_MIPS_MAX_CORES];
+
+/**
+ * Programs the boot bus moveable region
+ * @param      base    base address to place the boot bus moveable region
+ *                     (bits [31:7])
+ * @param      region_num      Selects which region, 0 or 1 for node 0,
+ *                             2 or 3 for node 1
+ * @param      enable          Set true to enable, false to disable
+ * @param      data            Pointer to data to put in the region, up to
+ *                             16 dwords.
+ * @param      num_words       Number of data dwords (up to 32)
+ *
+ * @return     0 for success, -1 on error
+ */
+static int octeon_set_moveable_region(u32 base, int region_num,
+                                     bool enable, const u64 *data,
+                                     unsigned int num_words)
+{
+       int node = region_num >> 1;
+       u64 val;
+       int i;
+       u8 node_mask = 0x01;    /* ToDo: Currently only one node is supported */
+
+       debug("%s(0x%x, %d, %d, %p, %u)\n", __func__, base, region_num, enable,
+             data, num_words);
+
+       if (num_words > 32) {
+               printf("%s: Too many words (%d) for region %d\n", __func__,
+                      num_words, region_num);
+               return -1;
+       }
+
+       if (base & 0x7f) {
+               printf("%s: Error: base address 0x%x must be 128 byte aligned\n",
+                      __func__, base);
+               return -1;
+       }
+
+       if (region_num > (node_mask > 1 ? 3 : 1)) {
+               printf("%s: Region number %d out of range\n",
+                      __func__, region_num);
+               return -1;
+       }
+
+       if (!data && num_words > 0) {
+               printf("%s: Error: NULL data\n", __func__);
+               return -1;
+       }
+
+       region_num &= 1;
+
+       val = MIO_BOOT_LOC_CFG_EN |
+               FIELD_PREP(MIO_BOOT_LOC_CFG_BASE, base >> 7);
+       debug("%s: Setting MIO_BOOT_LOC_CFG(%d) on node %d to 0x%llx\n",
+             __func__, region_num, node, val);
+       csr_wr(CVMX_MIO_BOOT_LOC_CFGX(region_num & 1), val);
+
+       val = FIELD_PREP(MIO_BOOT_LOC_ADR_ADR, (region_num ? 0x80 : 0x00) >> 3);
+       debug("%s: Setting MIO_BOOT_LOC_ADR start to 0x%llx\n", __func__, val);
+       csr_wr(CVMX_MIO_BOOT_LOC_ADR, val);
+
+       for (i = 0; i < num_words; i++) {
+               debug("  0x%02llx: 0x%016llx\n",
+                     csr_rd(CVMX_MIO_BOOT_LOC_ADR), data[i]);
+               csr_wr(CVMX_MIO_BOOT_LOC_DAT, data[i]);
+       }
+
+       return 0;
+}
+
+/**
+ * Parse comma separated numbers into an array
+ *
+ * @param[out] values values read for each node
+ * @param[in] str string to parse
+ * @param base 0 for auto, otherwise 8, 10 or 16 for the number base
+ *
+ * @return number of values read.
+ */
+static int octeon_parse_nodes(u64 values[CVMX_MAX_NODES],
+                             const char *str, int base)
+{
+       int node = 0;
+       char *sep;
+
+       do {
+               debug("Parsing node %d: \"%s\"\n", node, str);
+               values[node] = simple_strtoull(str, &sep, base);
+               debug("  node %d: 0x%llx\n", node, values[node]);
+               str = sep + 1;
+       } while (++node < CVMX_MAX_NODES && *sep == ',');
+
+       debug("%s: returning %d\n", __func__, node);
+       return node;
+}
+
+/**
+ * Parse command line arguments
+ *
+ * @param argc                 number of arguments
+ * @param[in] argv             array of argument strings
+ * @param cmd                  command type
+ * @param[out] boot_args       parsed values
+ *
+ * @return number of arguments parsed
+ */
+int octeon_parse_bootopts(int argc, char *const argv[],
+                         enum octeon_boot_cmd_type cmd,
+                         struct octeon_boot_args *boot_args)
+{
+       u64 node_values[CVMX_MAX_NODES];
+       int arg, j;
+       int num_values;
+       int node;
+       u8 node_mask = 0x01;    /* ToDo: Currently only one node is supported */
+
+       debug("%s(%d, %p, %d, %p)\n", __func__, argc, argv, cmd, boot_args);
+       memset(boot_args, 0, sizeof(*boot_args));
+       boot_args->stack_size = DEFAULT_STACK_SIZE;
+       boot_args->heap_size = DEFAULT_HEAP_SIZE;
+       boot_args->node_mask = 0;
+
+       for (arg = 0; arg < argc; arg++) {
+               debug("  argv[%d]: %s\n", arg, argv[arg]);
+               if (cmd == BOOTOCT && !strncmp(argv[arg], "stack=", 6)) {
+                       boot_args->stack_size = simple_strtoul(argv[arg] + 6,
+                                                              NULL, 0);
+               } else if (cmd == BOOTOCT && !strncmp(argv[arg], "heap=", 5)) {
+                       boot_args->heap_size = simple_strtoul(argv[arg] + 5,
+                                                             NULL, 0);
+               } else if (!strncmp(argv[arg], "debug", 5)) {
+                       puts("setting debug flag!\n");
+                       boot_args->boot_flags |= OCTEON_BL_FLAG_DEBUG;
+               } else if (cmd == BOOTOCT && !strncmp(argv[arg], "break", 5)) {
+                       puts("setting break flag!\n");
+                       boot_args->boot_flags |= OCTEON_BL_FLAG_BREAK;
+               } else if (!strncmp(argv[arg], "forceboot", 9)) {
+                       boot_args->forceboot = true;
+               } else if (!strncmp(argv[arg], "nodemask=", 9)) {
+                       boot_args->node_mask = simple_strtoul(argv[arg] + 9,
+                                                             NULL, 16);
+               } else if (!strncmp(argv[arg], "numcores=", 9)) {
+                       memset(node_values, 0, sizeof(node_values));
+                       num_values = octeon_parse_nodes(node_values,
+                                                       argv[arg] + 9, 0);
+                       for (j = 0; j < num_values; j++)
+                               boot_args->num_cores[j] = node_values[j];
+                       boot_args->num_cores_set = true;
+               } else if (!strncmp(argv[arg], "skipcores=", 10)) {
+                       memset(node_values, 0, sizeof(node_values));
+                       num_values = octeon_parse_nodes(node_values,
+                                                       argv[arg] + 10, 0);
+                       for (j = 0; j < num_values; j++)
+                               boot_args->num_skipped[j] = node_values[j];
+                       boot_args->num_skipped_set = true;
+               } else if (!strncmp(argv[arg], "console_uart=", 13)) {
+                       boot_args->console_uart = simple_strtoul(argv[arg] + 13,
+                                                                NULL, 0);
+                       if (boot_args->console_uart == 1) {
+                               boot_args->boot_flags |=
+                                       OCTEON_BL_FLAG_CONSOLE_UART1;
+                       } else if (!boot_args->console_uart) {
+                               boot_args->boot_flags &=
+                                       ~OCTEON_BL_FLAG_CONSOLE_UART1;
+                       }
+               } else if (!strncmp(argv[arg], "coremask=", 9)) {
+                       memset(node_values, 0, sizeof(node_values));
+                       num_values = octeon_parse_nodes(node_values,
+                                                       argv[arg] + 9, 16);
+                       for (j = 0; j < num_values; j++)
+                               cvmx_coremask_set64_node(&boot_args->coremask,
+                                                        j, node_values[j]);
+                       boot_args->coremask_set = true;
+               } else if (cmd == BOOTOCTLINUX &&
+                          !strncmp(argv[arg], "namedblock=", 11)) {
+                       boot_args->named_block = argv[arg] + 11;
+               } else if (!strncmp(argv[arg], "endbootargs", 11)) {
+                       boot_args->endbootargs = 1;
+                       arg++;
+                       if (argc >= arg && cmd != BOOTOCTLINUX)
+                               boot_args->app_name = argv[arg];
+                       break;
+               } else {
+                       debug(" Unknown argument \"%s\"\n", argv[arg]);
+               }
+       }
+
+       if (boot_args->coremask_set && boot_args->num_cores_set) {
+               puts("Warning: both coremask and numcores are set, using coremask.\n");
+       } else if (!boot_args->coremask_set && !boot_args->num_cores_set) {
+               cvmx_coremask_set_core(&boot_args->coremask, 0);
+               boot_args->coremask_set = true;
+       } else if ((!boot_args->coremask_set) && boot_args->num_cores_set) {
+               cvmx_coremask_for_each_node(node, node_mask)
+                       cvmx_coremask_set64_node(&boot_args->coremask, node,
+                               ((1ull << boot_args->num_cores[node]) - 1) <<
+                                       boot_args->num_skipped[node]);
+               boot_args->coremask_set = true;
+       }
+
+       /* Update the node mask based on the coremask or the number of cores */
+       for (j = 0; j < CVMX_MAX_NODES; j++) {
+               if (cvmx_coremask_get64_node(&boot_args->coremask, j))
+                       boot_args->node_mask |= 1 << j;
+       }
+
+       debug("%s: return %d\n", __func__, arg);
+       return arg;
+}
+
+int do_bootoctlinux(struct cmd_tbl *cmdtp, int flag, int argc,
+                   char *const argv[])
+{
+       typedef void __noreturn (*kernel_entry_t)(int, ulong, ulong, ulong);
+       kernel_entry_t kernel;
+       struct octeon_boot_args boot_args;
+       int arg_start = 1;
+       int arg_count;
+       u64 addr = 0;           /* Address of the ELF image     */
+       int arg0;
+       u64 arg1;
+       u64 arg2;
+       u64 arg3;
+       int ret;
+       struct cvmx_coremask core_mask;
+       struct cvmx_coremask coremask_to_run;
+       struct cvmx_coremask avail_coremask;
+       int first_core;
+       int core;
+       struct ram_info ram;
+       struct udevice *dev;
+       const u64 *nmi_code;
+       int num_dwords;
+       u8 node_mask = 0x01;
+       int i;
+
+       cvmx_coremask_clear_all(&core_mask);
+       cvmx_coremask_clear_all(&coremask_to_run);
+
+       if (argc >= 2 && (isxdigit(argv[1][0]) && (isxdigit(argv[1][1]) ||
+                                                  argv[1][1] == 'x' ||
+                                                  argv[1][1] == 'X' ||
+                                                  argv[1][1] == '\0'))) {
+               addr = simple_strtoul(argv[1], NULL, 16);
+               if (!addr)
+                       addr = CONFIG_SYS_LOAD_ADDR;
+               arg_start++;
+       }
+       if (addr == 0)
+               addr = CONFIG_SYS_LOAD_ADDR;
+
+       debug("%s: arg start: %d\n", __func__, arg_start);
+       arg_count = octeon_parse_bootopts(argc - arg_start, argv + arg_start,
+                                         BOOTOCTLINUX, &boot_args);
+
+       debug("%s:\n"
+             " named block: %s\n"
+             " node mask: 0x%x\n"
+             " stack size: 0x%x\n"
+             " heap size: 0x%x\n"
+             " boot flags: 0x%x\n"
+             " force boot: %s\n"
+             " coremask set: %s\n"
+             " num cores set: %s\n"
+             " num skipped set: %s\n"
+             " endbootargs: %s\n",
+             __func__,
+             boot_args.named_block ? boot_args.named_block : "none",
+             boot_args.node_mask,
+             boot_args.stack_size,
+             boot_args.heap_size,
+             boot_args.boot_flags,
+             boot_args.forceboot ? "true" : "false",
+             boot_args.coremask_set ? "true" : "false",
+             boot_args.num_cores_set ? "true" : "false",
+             boot_args.num_skipped_set ? "true" : "false",
+             boot_args.endbootargs ? "true" : "false");
+       debug(" num cores: ");
+       for (i = 0; i < CVMX_MAX_NODES; i++)
+               debug("%s%d", i > 0 ? ", " : "", boot_args.num_cores[i]);
+       debug("\n num skipped: ");
+       for (i = 0; i < CVMX_MAX_NODES; i++) {
+               debug("%s%d", i > 0 ? ", " : "", boot_args.num_skipped[i]);
+               debug("\n coremask:\n");
+               cvmx_coremask_dprint(&boot_args.coremask);
+       }
+
+       if (boot_args.endbootargs) {
+               debug("endbootargs set, adjusting argc from %d to %d, arg_count: %d, arg_start: %d\n",
+                     argc, argc - (arg_count + arg_start), arg_count,
+                     arg_start);
+               argc -= (arg_count + arg_start);
+               argv += (arg_count + arg_start);
+       }
+
+       /*
+        * numcores specification overrides a coremask on the same command line
+        */
+       cvmx_coremask_copy(&core_mask, &boot_args.coremask);
+
+       /*
+        * Remove cores from coremask based on environment variable stored in
+        * flash
+        */
+       if (validate_coremask(&core_mask) != 0) {
+               puts("Invalid coremask.\n");
+               return 1;
+       } else if (cvmx_coremask_is_empty(&core_mask)) {
+               puts("Coremask is empty after coremask_override mask.  Nothing to do.\n");
+               return 0;
+       }
+
+       if (cvmx_coremask_intersects(&core_mask, &coremask_to_run)) {
+               puts("ERROR: Can't load code on core twice!  Provided coremask:\n");
+               cvmx_coremask_print(&core_mask);
+               puts("overlaps previously loaded coremask:\n");
+               cvmx_coremask_print(&coremask_to_run);
+               return -1;
+       }
+
+       debug("Setting up boot descriptor block with core mask:\n");
+       cvmx_coremask_dprint(&core_mask);
+
+       /*
+        * Add coremask to global mask of cores that have been set up and are
+        * runable
+        */
+       cvmx_coremask_or(&coremask_to_run, &coremask_to_run, &core_mask);
+
+       /* Get RAM size */
+       ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+       if (ret) {
+               debug("DRAM init failed: %d\n", ret);
+               return ret;
+       }
+
+       ret = ram_get_info(dev, &ram);
+       if (ret) {
+               debug("Cannot get DRAM size: %d\n", ret);
+               return ret;
+       }
+
+       /*
+        * Load kernel ELF image, or try binary if ELF is not detected.
+        * This way the much smaller vmlinux.bin can also be started but
+        * has to be loaded at the correct address (ep as parameter).
+        */
+       if (!valid_elf_image(addr))
+               printf("Booting binary image instead (vmlinux.bin)...\n");
+       else
+               addr = load_elf_image_shdr(addr);
+
+       /* Set kernel entry point */
+       kernel = (kernel_entry_t)addr;
+
+       /* Init bootmem list for Linux kernel booting */
+       if (!cvmx_bootmem_phy_mem_list_init(
+                   ram.size, OCTEON_RESERVED_LOW_MEM_SIZE,
+                   (void *)CKSEG0ADDR(BOOTLOADER_BOOTMEM_DESC_SPACE))) {
+               printf("FATAL: Error initializing free memory list\n");
+               return 0;
+       }
+
+       first_core = cvmx_coremask_get_first_core(&coremask_to_run);
+
+       cvmx_coremask_for_each_core(core, &coremask_to_run) {
+               debug("%s: Activating core %d\n",  __func__, core);
+
+               cvmx_bootinfo_array[core].core_mask =
+                       cvmx_coremask_get32(&coremask_to_run);
+               cvmx_coremask_copy(&cvmx_bootinfo_array[core].ext_core_mask,
+                                  &coremask_to_run);
+
+               if (core == first_core)
+                       cvmx_bootinfo_array[core].flags |= BOOT_FLAG_INIT_CORE;
+
+               cvmx_bootinfo_array[core].dram_size = ram.size / (1024 * 1024);
+
+               cvmx_bootinfo_array[core].dclock_hz = gd->mem_clk * 1000000;
+               cvmx_bootinfo_array[core].eclock_hz = gd->cpu_clk;
+
+               cvmx_bootinfo_array[core].led_display_base_addr = 0;
+               cvmx_bootinfo_array[core].phy_mem_desc_addr =
+                       ((u32)(u64)__cvmx_bootmem_internal_get_desc_ptr()) &
+                       0x7ffffff;
+
+               cvmx_bootinfo_array[core].major_version = CVMX_BOOTINFO_MAJ_VER;
+               cvmx_bootinfo_array[core].minor_version = CVMX_BOOTINFO_MIN_VER;
+               cvmx_bootinfo_array[core].fdt_addr = virt_to_phys(gd->fdt_blob);
+
+               boot_desc[core].dram_size = gd->ram_size / (1024 * 1024);
+               boot_desc[core].cvmx_desc_vaddr =
+                       virt_to_phys(&cvmx_bootinfo_array[core]);
+
+               boot_desc[core].desc_version = OCTEON_CURRENT_DESC_VERSION;
+               boot_desc[core].desc_size = sizeof(boot_desc[0]);
+
+               boot_desc[core].flags = cvmx_bootinfo_array[core].flags;
+               boot_desc[core].eclock_hz = cvmx_bootinfo_array[core].eclock_hz;
+
+               boot_desc[core].argc = argc;
+               for (i = 0; i < argc; i++)
+                       boot_desc[core].argv[i] = (u32)virt_to_phys(argv[i]);
+       }
+
+       core = 0;
+       arg0 = argc;
+       arg1 = (u64)argv;
+       arg2 = 0x1;     /* Core 0 sets init core for Linux */
+       arg3 = XKPHYS | virt_to_phys(&boot_desc[core]);
+
+       debug("## Transferring control to Linux (at address %p) ...\n", kernel);
+
+       /*
+        * Flush cache before jumping to application. Let's flush the
+        * whole SDRAM area, since we don't know the size of the image
+        * that was loaded.
+        */
+       flush_cache(gd->ram_base, gd->ram_top - gd->ram_base);
+
+       /* Take all cores out of reset */
+       csr_wr(CVMX_CIU_PP_RST, 0);
+       sync();
+
+       /* Wait a short while for the other cores... */
+       mdelay(100);
+
+       /* Install boot code into moveable bus for NMI (other cores) */
+       nmi_code = (const u64 *)nmi_bootvector;
+       num_dwords = (((u64)&nmi_handler_para[0] - (u64)nmi_code) + 7) / 8;
+
+       ret = octeon_set_moveable_region(0x1fc00000, 0, true, nmi_code,
+                                        num_dwords);
+       if (ret) {
+               printf("Error installing NMI handler for SMP core startup\n");
+               return 0;
+       }
+
+       /* Write NMI handler parameters for Linux kernel booting */
+       nmi_handler_para[0] = (u64)kernel;
+       nmi_handler_para[1] = arg0;
+       nmi_handler_para[2] = arg1;
+       nmi_handler_para[3] = 0; /* Don't set init core for secondary cores */
+       nmi_handler_para[4] = arg3;
+       sync();
+
+       /* Wait a short while for the other cores... */
+       mdelay(100);
+
+       /*
+        * Cores have already been taken out of reset to conserve power.
+        * We need to send a NMI to get the cores out of their wait loop
+        */
+       octeon_get_available_coremask(&avail_coremask);
+       debug("Available coremask:\n");
+       cvmx_coremask_dprint(&avail_coremask);
+       debug("Starting coremask:\n");
+       cvmx_coremask_dprint(&coremask_to_run);
+       debug("Sending NMIs to other cores\n");
+       if (octeon_has_feature(OCTEON_FEATURE_CIU3)) {
+               u64 avail_cm;
+               int node;
+
+               cvmx_coremask_for_each_node(node, node_mask) {
+                       avail_cm = cvmx_coremask_get64_node(&avail_coremask,
+                                                           node);
+
+                       if (avail_cm != 0) {
+                               debug("Sending NMI  to node %d, coremask=0x%llx, CIU3_NMI=0x%llx\n",
+                                     node, avail_cm,
+                                     (node > 0 ? -1ull : -2ull) & avail_cm);
+                               csr_wr(CVMX_CIU3_NMI,
+                                      (node > 0 ? -1ull : -2ull) & avail_cm);
+                       }
+               }
+       } else {
+               csr_wr(CVMX_CIU_NMI,
+                      -2ull & cvmx_coremask_get64(&avail_coremask));
+       }
+       debug("Done sending NMIs\n");
+
+       /* Wait a short while for the other cores... */
+       mdelay(100);
+
+       /*
+        * pass address parameter as argv[0] (aka command name),
+        * and all remaining args
+        * a0 = argc
+        * a1 = argv (32 bit physical addresses, not pointers)
+        * a2 = init core
+        * a3 = boot descriptor address
+        * a4/t0 = entry point (only used by assembly stub)
+        */
+       kernel(arg0, arg1, arg2, arg3);
+
+       return 0;
+}
+
+U_BOOT_CMD(bootoctlinux, 32, 0, do_bootoctlinux,
+          "Boot from a linux ELF image in memory",
+          "elf_address [coremask=mask_to_run | numcores=core_cnt_to_run] "
+          "[forceboot] [skipcores=core_cnt_to_skip] [namedblock=name] [endbootargs] [app_args ...]\n"
+          "elf_address - address of ELF image to load. If 0, default load address\n"
+          "              is  used.\n"
+          "coremask    - mask of cores to run on.  Anded with coremask_override\n"
+          "              environment variable to ensure only working cores are used\n"
+          "numcores    - number of cores to run on.  Runs on specified number of cores,\n"
+          "              taking into account the coremask_override.\n"
+          "skipcores   - only meaningful with numcores.  Skips this many cores\n"
+          "              (starting from 0) when loading the numcores cores.\n"
+          "              For example, setting skipcores to 1 will skip core 0\n"
+          "              and load the application starting at the next available core.\n"
+          "forceboot   - if set, boots application even if core 0 is not in mask\n"
+          "namedblock  - specifies a named block to load the kernel\n"
+          "endbootargs - if set, bootloader does not process any further arguments and\n"
+          "              only passes the arguments that follow to the kernel.\n"
+          "              If not set, the kernel gets the entire commnad line as\n"
+          "              arguments.\n" "\n");
index 9a88bb9..f293d65 100644 (file)
@@ -5,14 +5,13 @@
 
 #include <cpu_func.h>
 
-/*
- * The Octeon platform is cache coherent and cache flushes and invalidates
- * are not needed. Define some platform specific empty flush_foo()
- * functions here to overwrite the _weak common function as a no-op.
- * This effectively disables all cache operations.
- */
+/* Octeon memory write barrier */
+#define CVMX_SYNCW     asm volatile ("syncw\nsyncw\n" : : : "memory")
+
 void flush_dcache_range(ulong start_addr, ulong stop)
 {
+       /* Flush all pending writes */
+       CVMX_SYNCW;
 }
 
 void flush_cache(ulong start_addr, ulong size)
@@ -21,4 +20,5 @@ void flush_cache(ulong start_addr, ulong size)
 
 void invalidate_dcache_range(ulong start_addr, ulong stop)
 {
+       /* Don't need to do anything for OCTEON */
 }
index 2680a2e..6f87a4e 100644 (file)
 
 DECLARE_GLOBAL_DATA_PTR;
 
+/*
+ * TRUE for devices having registers with little-endian byte
+ * order, FALSE for registers with native-endian byte order.
+ * PCI mandates little-endian, USB and SATA are configurable,
+ * but we chose little-endian for these.
+ *
+ * This table will be referened in the Octeon platform specific
+ * mangle-port.h header.
+ */
+const bool octeon_should_swizzle_table[256] = {
+       [0x00] = true,  /* bootbus/CF */
+       [0x1b] = true,  /* PCI mmio window */
+       [0x1c] = true,  /* PCI mmio window */
+       [0x1d] = true,  /* PCI mmio window */
+       [0x1e] = true,  /* PCI mmio window */
+       [0x68] = true,  /* OCTEON III USB */
+       [0x69] = true,  /* OCTEON III USB */
+       [0x6c] = true,  /* OCTEON III SATA */
+       [0x6f] = true,  /* OCTEON II USB */
+};
+
 static int get_clocks(void)
 {
        const u64 ref_clock = PLL_REF_CLK;
diff --git a/arch/mips/mach-octeon/cvmx-bootmem.c b/arch/mips/mach-octeon/cvmx-bootmem.c
new file mode 100644 (file)
index 0000000..80bb7ac
--- /dev/null
@@ -0,0 +1,1460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2020 Marvell International Ltd.
+ */
+
+/*
+ * Simple allocate only memory allocator. Used to allocate memory at
+ * application start time.
+ */
+
+#include <asm/global_data.h>
+
+#include <linux/compat.h>
+#include <linux/io.h>
+#include <linux/types.h>
+
+#include <mach/octeon-model.h>
+#include <mach/cvmx-bootmem.h>
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-regs.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define CVMX_MIPS32_SPACE_KSEG0                1L
+#define CVMX_MIPS_SPACE_XKPHYS         2LL
+
+#define CVMX_ADD_SEG(seg, add)         ((((u64)(seg)) << 62) | (add))
+#define CVMX_ADD_SEG32(seg, add)       (((u32)(seg) << 31) | (u32)(add))
+
+/**
+ * This is the physical location of a struct cvmx_bootmem_desc
+ * structure in Octeon's memory. Note that dues to addressing
+ * limits or runtime environment it might not be possible to
+ * create a C pointer to this structure.
+ */
+static u64 cvmx_bootmem_desc_addr;
+
+/**
+ * This macro returns the size of a member of a structure.
+ * Logically it is the same as "sizeof(s::field)" in C++, but
+ * C lacks the "::" operator.
+ */
+#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
+
+/**
+ * This macro returns a member of the struct cvmx_bootmem_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the struct cvmx_bootmem_desc to read.
+ * Regardless of the type of the field, the return type is always
+ * a u64.
+ */
+#define CVMX_BOOTMEM_DESC_GET_FIELD(field)                             \
+       __cvmx_bootmem_desc_get(cvmx_bootmem_desc_addr,                 \
+                               offsetof(struct cvmx_bootmem_desc, field), \
+                               SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
+
+/**
+ * This macro writes a member of the struct cvmx_bootmem_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the struct cvmx_bootmem_desc to write.
+ */
+#define CVMX_BOOTMEM_DESC_SET_FIELD(field, value)                      \
+       __cvmx_bootmem_desc_set(cvmx_bootmem_desc_addr,                 \
+                               offsetof(struct cvmx_bootmem_desc, field), \
+                               SIZEOF_FIELD(struct cvmx_bootmem_desc, field), \
+                               value)
+
+/**
+ * This macro returns a member of the
+ * struct cvmx_bootmem_named_block_desc structure. These members can't
+ * be directly addressed as they might be in memory not directly
+ * reachable. In the case where bootmem is compiled with
+ * LINUX_HOST, the structure itself might be located on a remote
+ * Octeon. The argument "field" is the member name of the
+ * struct cvmx_bootmem_named_block_desc to read. Regardless of the type
+ * of the field, the return type is always a u64. The "addr"
+ * parameter is the physical address of the structure.
+ */
+#define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field)                      \
+       __cvmx_bootmem_desc_get(addr,                                   \
+               offsetof(struct cvmx_bootmem_named_block_desc,  field), \
+               SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+
+/**
+ * This macro writes a member of the struct cvmx_bootmem_named_block_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the
+ * struct cvmx_bootmem_named_block_desc to write. The "addr" parameter
+ * is the physical address of the structure.
+ */
+#define CVMX_BOOTMEM_NAMED_SET_FIELD(addr, field, value)               \
+       __cvmx_bootmem_desc_set(addr,                                   \
+               offsetof(struct cvmx_bootmem_named_block_desc, field),  \
+               SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field), \
+                               value)
+
+/**
+ * This function is the implementation of the get macros defined
+ * for individual structure members. The argument are generated
+ * by the macros inorder to read only the needed memory.
+ *
+ * @param base   64bit physical address of the complete structure
+ * @param offset Offset from the beginning of the structure to the member being
+ *               accessed.
+ * @param size   Size of the structure member.
+ *
+ * @return Value of the structure member promoted into a u64.
+ */
+static inline u64 __cvmx_bootmem_desc_get(u64 base, int offset,
+                                         int size)
+{
+       base = (1ull << 63) | (base + offset);
+       switch (size) {
+       case 4:
+               return cvmx_read64_uint32(base);
+       case 8:
+               return cvmx_read64_uint64(base);
+       default:
+               return 0;
+       }
+}
+
+/**
+ * This function is the implementation of the set macros defined
+ * for individual structure members. The argument are generated
+ * by the macros in order to write only the needed memory.
+ *
+ * @param base   64bit physical address of the complete structure
+ * @param offset Offset from the beginning of the structure to the member being
+ *               accessed.
+ * @param size   Size of the structure member.
+ * @param value  Value to write into the structure
+ */
+static inline void __cvmx_bootmem_desc_set(u64 base, int offset, int size,
+                                          u64 value)
+{
+       base = (1ull << 63) | (base + offset);
+       switch (size) {
+       case 4:
+               cvmx_write64_uint32(base, value);
+               break;
+       case 8:
+               cvmx_write64_uint64(base, value);
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * This function returns the address of the bootmem descriptor lock.
+ *
+ * @return 64-bit address in KSEG0 of the bootmem descriptor block
+ */
+static inline u64 __cvmx_bootmem_get_lock_addr(void)
+{
+       return (1ull << 63) |
+               (cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc, lock));
+}
+
+/**
+ * This function retrieves the string name of a named block. It is
+ * more complicated than a simple memcpy() since the named block
+ * descriptor may not be directly accessible.
+ *
+ * @param addr   Physical address of the named block descriptor
+ * @param str    String to receive the named block string name
+ * @param len    Length of the string buffer, which must match the length
+ *               stored in the bootmem descriptor.
+ */
+static void CVMX_BOOTMEM_NAMED_GET_NAME(u64 addr, char *str, int len)
+{
+       int l = len;
+       char *ptr = str;
+
+       addr |= (1ull << 63);
+       addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
+       while (l) {
+               /*
+                * With big-endian in memory byte order, this gives uniform
+                * results for the CPU in either big or Little endian mode.
+                */
+               u64 blob = cvmx_read64_uint64(addr);
+               int sa = 56;
+
+               addr += sizeof(u64);
+               while (l && sa >= 0) {
+                       *ptr++ = (char)(blob >> sa);
+                       l--;
+                       sa -= 8;
+               }
+       }
+       str[len] = 0;
+}
+
+/**
+ * This function stores the string name of a named block. It is
+ * more complicated than a simple memcpy() since the named block
+ * descriptor may not be directly accessible.
+ *
+ * @param addr   Physical address of the named block descriptor
+ * @param str    String to store into the named block string name
+ * @param len    Length of the string buffer, which must match the length
+ *               stored in the bootmem descriptor.
+ */
+void CVMX_BOOTMEM_NAMED_SET_NAME(u64 addr, const char *str, int len)
+{
+       int l = len;
+
+       addr |= (1ull << 63);
+       addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
+
+       while (l) {
+               /*
+                * With big-endian in memory byte order, this gives uniform
+                * results for the CPU in either big or Little endian mode.
+                */
+               u64 blob = 0;
+               int sa = 56;
+
+               while (l && sa >= 0) {
+                       u64 c = (u8)(*str++);
+
+                       l--;
+                       if (l == 0)
+                               c = 0;
+                       blob |= c << sa;
+                       sa -= 8;
+               }
+               cvmx_write64_uint64(addr, blob);
+               addr += sizeof(u64);
+       }
+}
+
+/* See header file for descriptions of functions */
+
+/*
+ * Wrapper functions are provided for reading/writing the size and next block
+ * values as these may not be directly addressible (in 32 bit applications, for
+ * instance.)
+ *
+ * Offsets of data elements in bootmem list, must match
+ * struct cvmx_bootmem_block_header
+ */
+#define NEXT_OFFSET 0
+#define SIZE_OFFSET 8
+
+static void cvmx_bootmem_phy_set_size(u64 addr, u64 size)
+{
+       cvmx_write64_uint64((addr + SIZE_OFFSET) | (1ull << 63), size);
+}
+
+static void cvmx_bootmem_phy_set_next(u64 addr, u64 next)
+{
+       cvmx_write64_uint64((addr + NEXT_OFFSET) | (1ull << 63), next);
+}
+
+static u64 cvmx_bootmem_phy_get_size(u64 addr)
+{
+       return cvmx_read64_uint64((addr + SIZE_OFFSET) | (1ull << 63));
+}
+
+static u64 cvmx_bootmem_phy_get_next(u64 addr)
+{
+       return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63));
+}
+
+/**
+ * Check the version information on the bootmem descriptor
+ *
+ * @param exact_match
+ *               Exact major version to check against. A zero means
+ *               check that the version supports named blocks.
+ *
+ * @return Zero if the version is correct. Negative if the version is
+ *         incorrect. Failures also cause a message to be displayed.
+ */
+static int __cvmx_bootmem_check_version(int exact_match)
+{
+       int major_version;
+
+       major_version = CVMX_BOOTMEM_DESC_GET_FIELD(major_version);
+       if (major_version > 3 ||
+           (exact_match && major_version) != exact_match) {
+               debug("ERROR: Incompatible bootmem descriptor version: %d.%d at addr: 0x%llx\n",
+                     major_version,
+                     (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version),
+                     CAST_ULL(cvmx_bootmem_desc_addr));
+               return -1;
+       } else {
+               return 0;
+       }
+}
+
+/**
+ * Get the low level bootmem descriptor lock. If no locking
+ * is specified in the flags, then nothing is done.
+ *
+ * @param flags  CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do
+ *               nothing. This is used to support nested bootmem calls.
+ */
+static inline void __cvmx_bootmem_lock(u32 flags)
+{
+       if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) {
+               /*
+                * Unfortunately we can't use the normal cvmx-spinlock code as
+                * the memory for the bootmem descriptor may be not accessible
+                * by a C pointer. We use a 64bit XKPHYS address to access the
+                * memory directly
+                */
+               u64 lock_addr = (1ull << 63) |
+                       (cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc,
+                                                          lock));
+               unsigned int tmp;
+
+               __asm__ __volatile__(".set noreorder\n"
+                                    "1: ll   %[tmp], 0(%[addr])\n"
+                                    "   bnez %[tmp], 1b\n"
+                                    "   li   %[tmp], 1\n"
+                                    "   sc   %[tmp], 0(%[addr])\n"
+                                    "   beqz %[tmp], 1b\n"
+                                    "   nop\n"
+                                    ".set reorder\n"
+                                    : [tmp] "=&r"(tmp)
+                                    : [addr] "r"(lock_addr)
+                                    : "memory");
+       }
+}
+
+/**
+ * Release the low level bootmem descriptor lock. If no locking
+ * is specified in the flags, then nothing is done.
+ *
+ * @param flags  CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do
+ *               nothing. This is used to support nested bootmem calls.
+ */
+static inline void __cvmx_bootmem_unlock(u32 flags)
+{
+       if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) {
+               /*
+                * Unfortunately we can't use the normal cvmx-spinlock code as
+                * the memory for the bootmem descriptor may be not accessible
+                * by a C pointer. We use a 64bit XKPHYS address to access the
+                * memory directly
+                */
+               u64 lock_addr = __cvmx_bootmem_get_lock_addr();
+
+               CVMX_SYNCW;
+               __asm__ __volatile__("sw $0, 0(%[addr])\n"
+                                    : : [addr] "r"(lock_addr)
+                                    : "memory");
+               CVMX_SYNCW;
+       }
+}
+
+/*
+ * Some of the cvmx-bootmem functions dealing with C pointers are not
+ * supported when we are compiling for CVMX_BUILD_FOR_LINUX_HOST. This
+ * ifndef removes these functions when they aren't needed.
+ *
+ * This functions takes an address range and adjusts it as necessary
+ * to match the ABI that is currently being used.  This is required to
+ * ensure that bootmem_alloc* functions only return valid pointers for
+ * 32 bit ABIs
+ */
+static int __cvmx_validate_mem_range(u64 *min_addr_ptr,
+                                    u64 *max_addr_ptr)
+{
+       u64 max_phys = (1ull << 29) - 0x10;     /* KSEG0 */
+
+       *min_addr_ptr = min_t(u64, max_t(u64, *min_addr_ptr, 0x0), max_phys);
+       if (!*max_addr_ptr) {
+               *max_addr_ptr = max_phys;
+       } else {
+               *max_addr_ptr = max_t(u64, min_t(u64, *max_addr_ptr,
+                                                max_phys), 0x0);
+       }
+
+       return 0;
+}
+
+u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment,
+                                u64 min_addr, u64 max_addr)
+{
+       s64 address;
+
+       __cvmx_validate_mem_range(&min_addr, &max_addr);
+       address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+                                        alignment, 0);
+       if (address > 0)
+               return address;
+       else
+               return 0;
+}
+
+void *cvmx_bootmem_alloc_range(u64 size, u64 alignment,
+                              u64 min_addr, u64 max_addr)
+{
+       s64 address;
+
+       __cvmx_validate_mem_range(&min_addr, &max_addr);
+       address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+                                        alignment, 0);
+
+       if (address > 0)
+               return cvmx_phys_to_ptr(address);
+       else
+               return NULL;
+}
+
+void *cvmx_bootmem_alloc_address(u64 size, u64 address,
+                                u64 alignment)
+{
+       return cvmx_bootmem_alloc_range(size, alignment, address,
+                                       address + size);
+}
+
+void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment)
+{
+       return cvmx_bootmem_alloc_range(size, alignment,
+                                       node << CVMX_NODE_MEM_SHIFT,
+                                       ((node + 1) << CVMX_NODE_MEM_SHIFT) - 1);
+}
+
+void *cvmx_bootmem_alloc(u64 size, u64 alignment)
+{
+       return cvmx_bootmem_alloc_range(size, alignment, 0, 0);
+}
+
+void *cvmx_bootmem_alloc_named_range_once(u64 size, u64 min_addr,
+                                         u64 max_addr, u64 align,
+                                         const char *name,
+                                         void (*init)(void *))
+{
+       u64 named_block_desc_addr;
+       void *ptr;
+       s64 addr;
+
+       __cvmx_bootmem_lock(0);
+
+       __cvmx_validate_mem_range(&min_addr, &max_addr);
+       named_block_desc_addr =
+               cvmx_bootmem_phy_named_block_find(name,
+                                                 CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+       if (named_block_desc_addr) {
+               addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_desc_addr,
+                                                   base_addr);
+               __cvmx_bootmem_unlock(0);
+               return cvmx_phys_to_ptr(addr);
+       }
+
+       addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr,
+                                                 align, name,
+                                                 CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+       if (addr < 0) {
+               __cvmx_bootmem_unlock(0);
+               return NULL;
+       }
+       ptr = cvmx_phys_to_ptr(addr);
+
+       if (init)
+               init(ptr);
+       else
+               memset(ptr, 0, size);
+
+       __cvmx_bootmem_unlock(0);
+       return ptr;
+}
+
+void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr,
+                                          u64 max_addr, u64 align,
+                                          const char *name, u32 flags)
+{
+       s64 addr;
+
+       __cvmx_validate_mem_range(&min_addr, &max_addr);
+       addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr,
+                                                 align, name, flags);
+       if (addr >= 0)
+               return cvmx_phys_to_ptr(addr);
+       else
+               return NULL;
+}
+
+void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr,
+                                    u64 max_addr, u64 align,
+                                    const char *name)
+{
+       return cvmx_bootmem_alloc_named_range_flags(size, min_addr, max_addr,
+                                                   align, name, 0);
+}
+
+void *cvmx_bootmem_alloc_named_address(u64 size, u64 address,
+                                      const char *name)
+{
+       return cvmx_bootmem_alloc_named_range(size, address, address + size,
+                                             0, name);
+}
+
+void *cvmx_bootmem_alloc_named(u64 size, u64 alignment,
+                              const char *name)
+{
+       return cvmx_bootmem_alloc_named_range(size, 0, 0, alignment, name);
+}
+
+void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment,
+                                    const char *name, u32 flags)
+{
+       return cvmx_bootmem_alloc_named_range_flags(size, 0, 0, alignment,
+                                                   name, flags);
+}
+
+int cvmx_bootmem_free_named(const char *name)
+{
+       return cvmx_bootmem_phy_named_block_free(name, 0);
+}
+
+/**
+ * Find a named block with flags
+ *
+ * @param name is the block name
+ * @param flags indicates the need to use locking during search
+ * @return pointer to named block descriptor
+ *
+ * Note: this function returns a pointer to a static structure,
+ * and is therefore not re-entrant.
+ * Making this function re-entrant will break backward compatibility.
+ */
+const struct cvmx_bootmem_named_block_desc *
+__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags)
+{
+       static struct cvmx_bootmem_named_block_desc desc;
+       u64 named_addr = cvmx_bootmem_phy_named_block_find(name, flags);
+
+       if (named_addr) {
+               desc.base_addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr,
+                                                             base_addr);
+               desc.size = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size);
+               strncpy(desc.name, name, sizeof(desc.name));
+               desc.name[sizeof(desc.name) - 1] = 0;
+               return &desc;
+       } else {
+               return NULL;
+       }
+}
+
+const struct cvmx_bootmem_named_block_desc *
+cvmx_bootmem_find_named_block(const char *name)
+{
+       return __cvmx_bootmem_find_named_block_flags(name, 0);
+}
+
+void cvmx_bootmem_print_named(void)
+{
+       cvmx_bootmem_phy_named_block_print();
+}
+
+int cvmx_bootmem_init(u64 mem_desc_addr)
+{
+       if (!cvmx_bootmem_desc_addr)
+               cvmx_bootmem_desc_addr = mem_desc_addr;
+
+       return 0;
+}
+
+u64 cvmx_bootmem_available_mem(u64 min_block_size)
+{
+       return cvmx_bootmem_phy_available_mem(min_block_size);
+}
+
+/*
+ * The cvmx_bootmem_phy* functions below return 64 bit physical
+ * addresses, and expose more features that the cvmx_bootmem_functions
+ * above.  These are required for full memory space access in 32 bit
+ * applications, as well as for using some advance features.  Most
+ * applications should not need to use these.
+ */
+
+s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min,
+                          u64 address_max, u64 alignment,
+                          u32 flags)
+{
+       u64 head_addr, ent_addr, ent_size;
+       u64 target_ent_addr = 0, target_prev_addr = 0;
+       u64 target_size = ~0ull;
+       u64 free_start, free_end;
+       u64 next_addr, prev_addr = 0;
+       u64 new_ent_addr = 0, new_ent_size;
+       u64 desired_min_addr, usable_max;
+       u64 align, align_mask;
+
+       debug("%s: req_size: 0x%llx, min_addr: 0x%llx, max_addr: 0x%llx, align: 0x%llx\n",
+             __func__, CAST_ULL(req_size), CAST_ULL(address_min),
+             CAST_ULL(address_max), CAST_ULL(alignment));
+
+       if (__cvmx_bootmem_check_version(0))
+               return -1;
+
+       /*
+        * Do a variety of checks to validate the arguments.  The
+        * allocator code will later assume that these checks have
+        * been made.  We validate that the requested constraints are
+        * not self-contradictory before we look through the list of
+        * available memory
+        */
+
+       /* 0 is not a valid req_size for this allocator */
+       if (!req_size)
+               return -1;
+
+       /* Round req_size up to multiple of minimum alignment bytes */
+       req_size = (req_size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+               ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+       /* Make sure alignment is power of 2, and at least the minimum */
+       for (align = CVMX_BOOTMEM_ALIGNMENT_SIZE;
+            align < (1ull << 48);
+            align <<= 1) {
+               if (align >= alignment)
+                       break;
+       }
+
+       align_mask = ~(align - 1);
+
+       /*
+        * Adjust address minimum based on requested alignment (round
+        * up to meet alignment).  Do this here so we can reject
+        * impossible requests up front. (NOP for address_min == 0)
+        */
+       address_min = (address_min + (align - 1)) & align_mask;
+
+       /*
+        * Convert !0 address_min and 0 address_max to special case of
+        * range that specifies an exact memory block to allocate.  Do
+        * this before other checks and adjustments so that this
+        * tranformation will be validated
+        */
+       if (address_min && !address_max)
+               address_max = address_min + req_size;
+       else if (!address_min && !address_max)
+               address_max = ~0ull;    /* If no limits given, use max */
+
+       /*
+        * Reject inconsistent args.  We have adjusted these, so this
+        * may fail due to our internal changes even if this check
+        * would pass for the values the user supplied.
+        */
+       if (req_size > address_max - address_min)
+               return -1;
+
+       __cvmx_bootmem_lock(flags);
+
+       /* Walk through the list entries to find the right fit */
+       head_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+
+       for (ent_addr = head_addr;
+            ent_addr != 0ULL && ent_addr < address_max;
+            prev_addr = ent_addr,
+                    ent_addr = cvmx_bootmem_phy_get_next(ent_addr)) {
+               /* Raw free block size */
+               ent_size = cvmx_bootmem_phy_get_size(ent_addr);
+               next_addr = cvmx_bootmem_phy_get_next(ent_addr);
+
+               /* Validate the free list ascending order */
+               if (ent_size < CVMX_BOOTMEM_ALIGNMENT_SIZE ||
+                   (next_addr && ent_addr > next_addr)) {
+                       debug("ERROR: %s: bad free list ent: %#llx, next: %#llx\n",
+                             __func__, CAST_ULL(ent_addr),
+                             CAST_ULL(next_addr));
+                       goto error_out;
+               }
+
+               /* adjust free block edges for alignment */
+               free_start = (ent_addr + align - 1) & align_mask;
+               free_end = (ent_addr + ent_size) &  align_mask;
+
+               /* check that free block is large enough */
+               if ((free_start + req_size) > free_end)
+                       continue;
+
+               /* check that desired start is within the free block */
+               if (free_end < address_min || free_start > address_max)
+                       continue;
+               if ((free_end - address_min) < req_size)
+                       continue;
+               if ((address_max - free_start) < req_size)
+                       continue;
+
+               /* Found usebale free block */
+               target_ent_addr = ent_addr;
+               target_prev_addr = prev_addr;
+               target_size = ent_size;
+
+               /* Continue looking for highest/best block that fits */
+       }
+
+       /* Bail if the search has resulted in no eligible free blocks */
+       if (target_ent_addr == 0) {
+               debug("%s: eligible free block not found\n", __func__);
+               goto error_out;
+       }
+
+       /* Found the free block to allocate from */
+       ent_addr = target_ent_addr;
+       prev_addr = target_prev_addr;
+       ent_size = target_size;
+
+       debug("%s: using free block at %#010llx size %#llx\n",
+             __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size));
+
+       /* Always allocate from the end of a free block */
+       usable_max = min_t(u64, address_max, ent_addr + ent_size);
+       desired_min_addr = usable_max - req_size;
+       desired_min_addr &= align_mask;
+
+       /* Split current free block into up to 3 free blocks */
+
+       /* Check for head room */
+       if (desired_min_addr > ent_addr) {
+               /* Create a new free block at the allocation address */
+               new_ent_addr = desired_min_addr;
+               new_ent_size = ent_size - (desired_min_addr - ent_addr);
+
+               cvmx_bootmem_phy_set_next(new_ent_addr,
+                                         cvmx_bootmem_phy_get_next(ent_addr));
+               cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size);
+
+               /* Split out head room into a new free block */
+               ent_size -= new_ent_size;
+               cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr);
+               cvmx_bootmem_phy_set_size(ent_addr, ent_size);
+
+               debug("%s: splitting head, addr %#llx size %#llx\n",
+                     __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size));
+
+               /* Make the allocation target the current free block */
+               prev_addr = ent_addr;
+               ent_addr = new_ent_addr;
+               ent_size = new_ent_size;
+       }
+
+       /* Check for tail room */
+       if ((desired_min_addr + req_size) < (ent_addr + ent_size)) {
+               new_ent_addr = ent_addr + req_size;
+               new_ent_size = ent_size - req_size;
+
+               /* Create a new free block from tail room */
+               cvmx_bootmem_phy_set_next(new_ent_addr,
+                                         cvmx_bootmem_phy_get_next(ent_addr));
+               cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size);
+
+               debug("%s: splitting tail, addr %#llx size %#llx\n",
+                     __func__, CAST_ULL(new_ent_addr), CAST_ULL(new_ent_size));
+
+               /* Adjust the current block to exclude tail room */
+               ent_size = ent_size - new_ent_size;
+               cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr);
+               cvmx_bootmem_phy_set_size(ent_addr, ent_size);
+       }
+
+       /* The current free block IS the allocation target */
+       if (desired_min_addr != ent_addr || ent_size != req_size)
+               debug("ERROR: %s: internal error - addr %#llx %#llx size %#llx %#llx\n",
+                     __func__, CAST_ULL(desired_min_addr), CAST_ULL(ent_addr),
+                     CAST_ULL(ent_size), CAST_ULL(req_size));
+
+       /* Remove the current free block from list */
+       if (prev_addr) {
+               cvmx_bootmem_phy_set_next(prev_addr,
+                                         cvmx_bootmem_phy_get_next(ent_addr));
+       } else {
+               /* head of list being returned, so update head ptr */
+               CVMX_BOOTMEM_DESC_SET_FIELD(head_addr,
+                                           cvmx_bootmem_phy_get_next(ent_addr));
+       }
+
+       __cvmx_bootmem_unlock(flags);
+       debug("%s: allocated size: %#llx, at addr: %#010llx\n",
+             __func__,
+             CAST_ULL(req_size),
+             CAST_ULL(desired_min_addr));
+
+       return desired_min_addr;
+
+error_out:
+       /* Requested memory not found or argument error */
+       __cvmx_bootmem_unlock(flags);
+       return -1;
+}
+
+int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags)
+{
+       u64 cur_addr;
+       u64 prev_addr = 0;      /* zero is invalid */
+       int retval = 0;
+
+       debug("%s addr: %#llx, size: %#llx\n", __func__,
+             CAST_ULL(phy_addr), CAST_ULL(size));
+
+       if (__cvmx_bootmem_check_version(0))
+               return 0;
+
+       /* 0 is not a valid size for this allocator */
+       if (!size || !phy_addr)
+               return 0;
+
+       /* Round size up to mult of minimum alignment bytes */
+       size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+               ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+       __cvmx_bootmem_lock(flags);
+       cur_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+       if (cur_addr == 0 || phy_addr < cur_addr) {
+               /* add at front of list - special case with changing head ptr */
+               if (cur_addr && phy_addr + size > cur_addr)
+                       goto bootmem_free_done; /* error, overlapping section */
+               else if (phy_addr + size == cur_addr) {
+                       /* Add to front of existing first block */
+                       cvmx_bootmem_phy_set_next(phy_addr,
+                                                 cvmx_bootmem_phy_get_next(cur_addr));
+                       cvmx_bootmem_phy_set_size(phy_addr,
+                                                 cvmx_bootmem_phy_get_size(cur_addr) + size);
+                       CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr);
+
+               } else {
+                       /* New block before first block */
+                       /* OK if cur_addr is 0 */
+                       cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+                       cvmx_bootmem_phy_set_size(phy_addr, size);
+                       CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr);
+               }
+               retval = 1;
+               goto bootmem_free_done;
+       }
+
+       /* Find place in list to add block */
+       while (cur_addr && phy_addr > cur_addr) {
+               prev_addr = cur_addr;
+               cur_addr = cvmx_bootmem_phy_get_next(cur_addr);
+       }
+
+       if (!cur_addr) {
+               /*
+                * We have reached the end of the list, add on to end, checking
+                * to see if we need to combine with last block
+                */
+               if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) {
+                       cvmx_bootmem_phy_set_size(prev_addr,
+                                                 cvmx_bootmem_phy_get_size(prev_addr) + size);
+               } else {
+                       cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+                       cvmx_bootmem_phy_set_size(phy_addr, size);
+                       cvmx_bootmem_phy_set_next(phy_addr, 0);
+               }
+               retval = 1;
+               goto bootmem_free_done;
+       } else {
+               /*
+                * insert between prev and cur nodes, checking for merge with
+                * either/both
+                */
+               if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) {
+                       /* Merge with previous */
+                       cvmx_bootmem_phy_set_size(prev_addr,
+                                                 cvmx_bootmem_phy_get_size(prev_addr) + size);
+                       if (phy_addr + size == cur_addr) {
+                               /* Also merge with current */
+                               cvmx_bootmem_phy_set_size(prev_addr,
+                                                         cvmx_bootmem_phy_get_size(cur_addr) +
+                                                         cvmx_bootmem_phy_get_size(prev_addr));
+                               cvmx_bootmem_phy_set_next(prev_addr,
+                                                         cvmx_bootmem_phy_get_next(cur_addr));
+                       }
+                       retval = 1;
+                       goto bootmem_free_done;
+               } else if (phy_addr + size == cur_addr) {
+                       /* Merge with current */
+                       cvmx_bootmem_phy_set_size(phy_addr,
+                                                 cvmx_bootmem_phy_get_size(cur_addr) + size);
+                       cvmx_bootmem_phy_set_next(phy_addr,
+                                                 cvmx_bootmem_phy_get_next(cur_addr));
+                       cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+                       retval = 1;
+                       goto bootmem_free_done;
+               }
+
+               /* It is a standalone block, add in between prev and cur */
+               cvmx_bootmem_phy_set_size(phy_addr, size);
+               cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+               cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+       }
+       retval = 1;
+
+bootmem_free_done:
+       __cvmx_bootmem_unlock(flags);
+       return retval;
+}
+
+void cvmx_bootmem_phy_list_print(void)
+{
+       u64 addr;
+
+       addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+       printf("\n\n\nPrinting bootmem block list, descriptor: 0x%llx, head is 0x%llx\n",
+              CAST_ULL(cvmx_bootmem_desc_addr), CAST_ULL(addr));
+       printf("Descriptor version: %d.%d\n",
+              (int)CVMX_BOOTMEM_DESC_GET_FIELD(major_version),
+              (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version));
+       if (CVMX_BOOTMEM_DESC_GET_FIELD(major_version) > 3)
+               debug("Warning: Bootmem descriptor version is newer than expected\n");
+
+       if (!addr)
+               printf("mem list is empty!\n");
+
+       while (addr) {
+               printf("Block address: 0x%08llx, size: 0x%08llx, next: 0x%08llx\n", CAST_ULL(addr),
+                      CAST_ULL(cvmx_bootmem_phy_get_size(addr)),
+                      CAST_ULL(cvmx_bootmem_phy_get_next(addr)));
+               addr = cvmx_bootmem_phy_get_next(addr);
+       }
+       printf("\n\n");
+}
+
+u64 cvmx_bootmem_phy_available_mem(u64 min_block_size)
+{
+       u64 addr;
+
+       u64 available_mem = 0;
+
+       __cvmx_bootmem_lock(0);
+       addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+       while (addr) {
+               if (cvmx_bootmem_phy_get_size(addr) >= min_block_size)
+                       available_mem += cvmx_bootmem_phy_get_size(addr);
+               addr = cvmx_bootmem_phy_get_next(addr);
+       }
+       __cvmx_bootmem_unlock(0);
+       return available_mem;
+}
+
+u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags)
+{
+       u64 result = 0;
+
+       debug("%s: %s\n", __func__, name);
+
+       __cvmx_bootmem_lock(flags);
+       if (!__cvmx_bootmem_check_version(3)) {
+               int i;
+               u64 named_block_array_addr =
+                       CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr);
+               int num_blocks =
+                       CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks);
+               int name_length =
+                       CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len);
+               u64 named_addr = named_block_array_addr;
+
+               for (i = 0; i < num_blocks; i++) {
+                       u64 named_size =
+                               CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size);
+                       if (name && named_size) {
+                               char name_tmp[name_length + 1];
+
+                               CVMX_BOOTMEM_NAMED_GET_NAME(named_addr,
+                                                           name_tmp,
+                                                           name_length);
+                               if (!strncmp(name, name_tmp, name_length)) {
+                                       result = named_addr;
+                                       break;
+                               }
+                       } else if (!name && !named_size) {
+                               result = named_addr;
+                               break;
+                       }
+
+                       named_addr +=
+                               sizeof(struct cvmx_bootmem_named_block_desc);
+               }
+       }
+       __cvmx_bootmem_unlock(flags);
+       return result;
+}
+
+int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags)
+{
+       u64 named_block_addr;
+
+       if (__cvmx_bootmem_check_version(3))
+               return 0;
+
+       debug("%s: %s\n", __func__, name);
+
+       /*
+        * Take lock here, as name lookup/block free/name free need to be
+        * atomic
+        */
+       __cvmx_bootmem_lock(flags);
+
+       named_block_addr = cvmx_bootmem_phy_named_block_find(name,
+                                                            CVMX_BOOTMEM_FLAG_NO_LOCKING);
+       if (named_block_addr) {
+               u64 named_addr =
+                       CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr,
+                                                    base_addr);
+               u64 named_size =
+                       CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size);
+
+               debug("%s: %s, base: 0x%llx, size: 0x%llx\n",
+                     __func__, name, CAST_ULL(named_addr),
+                     CAST_ULL(named_size));
+
+               __cvmx_bootmem_phy_free(named_addr, named_size,
+                                       CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+               /* Set size to zero to indicate block not used. */
+               CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_addr, size, 0);
+       }
+
+       __cvmx_bootmem_unlock(flags);
+       return !!named_block_addr;      /* 0 on failure, 1 on success */
+}
+
+s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr,
+                                      u64 max_addr,
+                                      u64 alignment, const char *name,
+                                      u32 flags)
+{
+       s64 addr_allocated;
+       u64 named_block_desc_addr;
+
+       debug("%s: size: 0x%llx, min: 0x%llx, max: 0x%llx, align: 0x%llx, name: %s\n",
+             __func__, CAST_ULL(size), CAST_ULL(min_addr), CAST_ULL(max_addr),
+             CAST_ULL(alignment), name);
+
+       if (__cvmx_bootmem_check_version(3))
+               return -1;
+
+       /*
+        * Take lock here, as name lookup/block alloc/name add need to be
+        * atomic
+        */
+       __cvmx_bootmem_lock(flags);
+
+       named_block_desc_addr =
+               cvmx_bootmem_phy_named_block_find(name, flags |
+                                                 CVMX_BOOTMEM_FLAG_NO_LOCKING);
+       if (named_block_desc_addr) {
+               __cvmx_bootmem_unlock(flags);
+               return -1;
+       }
+
+       /* Get pointer to first available named block descriptor */
+       named_block_desc_addr =
+               cvmx_bootmem_phy_named_block_find(NULL, flags |
+                                                 CVMX_BOOTMEM_FLAG_NO_LOCKING);
+       if (!named_block_desc_addr) {
+               __cvmx_bootmem_unlock(flags);
+               return -1;
+       }
+
+       /*
+        * Round size up to mult of minimum alignment bytes
+        * We need the actual size allocated to allow for blocks to be
+        * coallesced when they are freed.  The alloc routine does the
+        * same rounding up on all allocations.
+        */
+       size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+               ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+       addr_allocated = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+                                               alignment,
+                                               flags | CVMX_BOOTMEM_FLAG_NO_LOCKING);
+       if (addr_allocated >= 0) {
+               CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, base_addr,
+                                            addr_allocated);
+               CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, size, size);
+               CVMX_BOOTMEM_NAMED_SET_NAME(named_block_desc_addr, name,
+                                           CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len));
+       }
+
+       __cvmx_bootmem_unlock(flags);
+       return addr_allocated;
+}
+
+void cvmx_bootmem_phy_named_block_print(void)
+{
+       int i;
+       int printed = 0;
+
+       u64 named_block_array_addr =
+               CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr);
+       int num_blocks = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks);
+       int name_length = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len);
+       u64 named_block_addr = named_block_array_addr;
+
+       debug("%s: desc addr: 0x%llx\n",
+             __func__, CAST_ULL(cvmx_bootmem_desc_addr));
+
+       if (__cvmx_bootmem_check_version(3))
+               return;
+
+       printf("List of currently allocated named bootmem blocks:\n");
+       for (i = 0; i < num_blocks; i++) {
+               u64 named_size =
+                       CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size);
+               if (named_size) {
+                       char name_tmp[name_length + 1];
+                       u64 named_addr =
+                               CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr,
+                                                            base_addr);
+                       CVMX_BOOTMEM_NAMED_GET_NAME(named_block_addr, name_tmp,
+                                                   name_length);
+                       printed++;
+                       printf("Name: %s, address: 0x%08llx, size: 0x%08llx, index: %d\n", name_tmp,
+                              CAST_ULL(named_addr),
+                              CAST_ULL(named_size), i);
+               }
+               named_block_addr +=
+                       sizeof(struct cvmx_bootmem_named_block_desc);
+       }
+
+       if (!printed)
+               printf("No named bootmem blocks exist.\n");
+}
+
+s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size,
+                                  u32 low_reserved_bytes,
+                                  struct cvmx_bootmem_desc *desc_buffer)
+{
+       u64 cur_block_addr;
+       s64 addr;
+       int i;
+
+       debug("%s (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n",
+             __func__, desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr));
+
+       /*
+        * Descriptor buffer needs to be in 32 bit addressable space to be
+        * compatible with 32 bit applications
+        */
+       if (!desc_buffer) {
+               debug("ERROR: no memory for cvmx_bootmem descriptor provided\n");
+               return 0;
+       }
+
+       if (mem_size > OCTEON_MAX_PHY_MEM_SIZE) {
+               mem_size = OCTEON_MAX_PHY_MEM_SIZE;
+               debug("ERROR: requested memory size too large, truncating to maximum size\n");
+       }
+
+       if (cvmx_bootmem_desc_addr)
+               return 1;
+
+       /* Initialize cvmx pointer to descriptor */
+       cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer));
+
+       /* Fill the bootmem descriptor */
+       CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER);
+       CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER);
+       CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0);
+
+       /*
+        * Set up global pointer to start of list, exclude low 64k for exception
+        * vectors, space for global descriptor
+        */
+       cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes);
+
+       if (mem_size <= OCTEON_DDR0_SIZE) {
+               __cvmx_bootmem_phy_free(cur_block_addr,
+                                       mem_size - low_reserved_bytes, 0);
+               goto frees_done;
+       }
+
+       __cvmx_bootmem_phy_free(cur_block_addr,
+                               OCTEON_DDR0_SIZE - low_reserved_bytes, 0);
+
+       mem_size -= OCTEON_DDR0_SIZE;
+
+       /* Add DDR2 block next if present */
+       if (mem_size > OCTEON_DDR1_SIZE) {
+               __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, OCTEON_DDR1_SIZE, 0);
+               __cvmx_bootmem_phy_free(OCTEON_DDR2_BASE,
+                                       mem_size - OCTEON_DDR1_SIZE, 0);
+       } else {
+               __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, mem_size, 0);
+       }
+frees_done:
+
+       /* Initialize the named block structure */
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN);
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks,
+                                   CVMX_BOOTMEM_NUM_NAMED_BLOCKS);
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0);
+
+       /* Allocate this near the top of the low 256 MBytes of memory */
+       addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS *
+                                     sizeof(struct cvmx_bootmem_named_block_desc),
+                                     0, 0x10000000, 0,
+                                     CVMX_BOOTMEM_FLAG_END_ALLOC);
+       if (addr >= 0)
+               CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr);
+
+       debug("%s: named_block_array_addr: 0x%llx)\n",
+             __func__, CAST_ULL(addr));
+
+       if (addr < 0) {
+               debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n");
+               return 0;
+       }
+
+       for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) {
+               CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0);
+               CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0);
+               addr += sizeof(struct cvmx_bootmem_named_block_desc);
+       }
+
+       return 1;
+}
+
+s64 cvmx_bootmem_phy_mem_list_init_multi(u8 node_mask,
+                                        u32 mem_sizes[],
+                                        u32 low_reserved_bytes,
+                                        struct cvmx_bootmem_desc *desc_buffer)
+{
+       u64 cur_block_addr;
+       u64 mem_size;
+       s64 addr;
+       int i;
+       int node;
+       u64 node_base;  /* Make u64 to reduce type casting */
+
+       mem_sizes[0] = gd->ram_size / (1024 * 1024);
+
+       debug("cvmx_bootmem_phy_mem_list_init (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n",
+             desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr));
+
+       /*
+        * Descriptor buffer needs to be in 32 bit addressable space to be
+        * compatible with 32 bit applications
+        */
+       if (!desc_buffer) {
+               debug("ERROR: no memory for cvmx_bootmem descriptor provided\n");
+               return 0;
+       }
+
+       cvmx_coremask_for_each_node(node, node_mask) {
+               if ((mem_sizes[node] * 1024 * 1024) > OCTEON_MAX_PHY_MEM_SIZE) {
+                       mem_sizes[node] = OCTEON_MAX_PHY_MEM_SIZE /
+                               (1024 * 1024);
+                       debug("ERROR node#%lld: requested memory size too large, truncating to maximum size\n",
+                             CAST_ULL(node));
+               }
+       }
+
+       if (cvmx_bootmem_desc_addr)
+               return 1;
+
+       /* Initialize cvmx pointer to descriptor */
+       cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer));
+
+       /* Fill the bootmem descriptor */
+       CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER);
+       CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER);
+       CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0);
+       CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0);
+
+       cvmx_coremask_for_each_node(node, node_mask) {
+               if (node != 0)  /* do not reserve memory on remote nodes */
+                       low_reserved_bytes = 0;
+
+               mem_size = (u64)mem_sizes[node] * (1024 * 1024); /* MBytes */
+
+               /*
+                * Set up global pointer to start of list, exclude low 64k
+                * for exception vectors, space for global descriptor
+                */
+
+               node_base = (u64)node << CVMX_NODE_MEM_SHIFT;
+               cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes) |
+                       node_base;
+
+               if (mem_size <= OCTEON_DDR0_SIZE) {
+                       __cvmx_bootmem_phy_free(cur_block_addr,
+                                               mem_size - low_reserved_bytes,
+                                               0);
+                       continue;
+               }
+
+               __cvmx_bootmem_phy_free(cur_block_addr,
+                                       OCTEON_DDR0_SIZE - low_reserved_bytes,
+                                       0);
+
+               mem_size -= OCTEON_DDR0_SIZE;
+
+               /* Add DDR2 block next if present */
+               if (mem_size > OCTEON_DDR1_SIZE) {
+                       __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE |
+                                               node_base,
+                                               OCTEON_DDR1_SIZE, 0);
+                       __cvmx_bootmem_phy_free(OCTEON_DDR2_BASE |
+                                               node_base,
+                                               mem_size - OCTEON_DDR1_SIZE, 0);
+               } else {
+                       __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE |
+                                               node_base,
+                                               mem_size, 0);
+               }
+       }
+
+       debug("%s: Initialize the named block\n", __func__);
+
+       /* Initialize the named block structure */
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN);
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks,
+                                   CVMX_BOOTMEM_NUM_NAMED_BLOCKS);
+       CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0);
+
+       /* Allocate this near the top of the low 256 MBytes of memory */
+       addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS *
+                                     sizeof(struct cvmx_bootmem_named_block_desc),
+                                     0, 0x10000000, 0,
+                                     CVMX_BOOTMEM_FLAG_END_ALLOC);
+       if (addr >= 0)
+               CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr);
+
+       debug("cvmx_bootmem_phy_mem_list_init: named_block_array_addr: 0x%llx)\n",
+             CAST_ULL(addr));
+
+       if (addr < 0) {
+               debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n");
+               return 0;
+       }
+
+       for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) {
+               CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0);
+               CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0);
+               addr += sizeof(struct cvmx_bootmem_named_block_desc);
+       }
+
+       // test-only: DEBUG ifdef???
+       cvmx_bootmem_phy_list_print();
+
+       return 1;
+}
+
+int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size,
+                               const char *name, u32 flags)
+{
+       u64 addr;
+       int rc = 1;
+       static unsigned int block_num;
+       char block_name[CVMX_BOOTMEM_NAME_LEN];
+
+       debug("%s: start %#llx, size: %#llx, name: %s, flags:%#x)\n",
+             __func__, CAST_ULL(start_addr), CAST_ULL(size), name, flags);
+
+       if (__cvmx_bootmem_check_version(3))
+               return 0;
+
+       addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+       if (!addr)
+               return 0;
+
+       if (!name)
+               name = "__cvmx_bootmem_reserved";
+
+       while (addr && rc) {
+               u64 block_size = cvmx_bootmem_phy_get_size(addr);
+               u64 reserve_size = 0;
+
+               if (addr >= start_addr && addr < start_addr + size) {
+                       reserve_size = size - (addr - start_addr);
+                       if (block_size < reserve_size)
+                               reserve_size = block_size;
+               } else if (start_addr > addr &&
+                          start_addr < (addr + block_size)) {
+                       reserve_size = block_size - (start_addr - addr);
+               }
+
+               if (reserve_size) {
+                       snprintf(block_name, sizeof(block_name),
+                                "%.32s_%012llx_%u",
+                                name, (unsigned long long)start_addr,
+                                (unsigned int)block_num);
+
+                       debug("%s: Reserving 0x%llx bytes at address 0x%llx with name %s\n",
+                             __func__, CAST_ULL(reserve_size),
+                             CAST_ULL(addr), block_name);
+
+                       if (cvmx_bootmem_phy_named_block_alloc(reserve_size,
+                                                              addr, 0, 0,
+                                                              block_name,
+                                                              flags) == -1) {
+                               debug("%s: Failed to reserve 0x%llx bytes at address 0x%llx\n",
+                                     __func__, CAST_ULL(reserve_size),
+                                     (unsigned long long)addr);
+                               rc = 0;
+                               break;
+                       }
+
+                       debug("%s: Reserved 0x%llx bytes at address 0x%llx with name %s\n",
+                             __func__, CAST_ULL(reserve_size),
+                             CAST_ULL(addr), block_name);
+               }
+
+               addr = cvmx_bootmem_phy_get_next(addr);
+               block_num++;
+       }
+
+       return rc;
+}
+
+void cvmx_bootmem_lock(void)
+{
+       __cvmx_bootmem_lock(0);
+}
+
+void cvmx_bootmem_unlock(void)
+{
+       __cvmx_bootmem_unlock(0);
+}
+
+void *__cvmx_phys_addr_to_ptr(u64 phys, int size)
+{
+       void *tmp;
+
+       if (sizeof(void *) == 8) {
+               tmp = CASTPTR(void, CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, phys));
+       } else {
+               u32 phy32 = (u32)(phys & 0x7fffffffULL);
+
+               tmp = CASTPTR(void, CVMX_ADD_SEG32(CVMX_MIPS32_SPACE_KSEG0,
+                                                  phy32));
+       }
+
+       return tmp;
+}
+
+void *__cvmx_bootmem_internal_get_desc_ptr(void)
+{
+       return cvmx_phys_to_ptr(cvmx_bootmem_desc_addr);
+}
diff --git a/arch/mips/mach-octeon/cvmx-coremask.c b/arch/mips/mach-octeon/cvmx-coremask.c
new file mode 100644 (file)
index 0000000..cff8c08
--- /dev/null
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2020 Marvell International Ltd.
+ */
+
+#include <env.h>
+#include <errno.h>
+
+#include <linux/compat.h>
+#include <linux/ctype.h>
+
+#include <mach/cvmx-regs.h>
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-fuse.h>
+#include <mach/octeon-model.h>
+#include <mach/octeon-feature.h>
+
+struct cvmx_coremask *get_coremask_override(struct cvmx_coremask *pcm)
+{
+       struct cvmx_coremask pcm_override = CVMX_COREMASK_MAX;
+       char *cptr;
+
+       /* The old code sets the number of cores to be to 16 in this case. */
+       cvmx_coremask_set_cores(pcm, 0, 16);
+
+       if (OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3())
+               cvmx_coremask_copy(pcm, &pcm_override);
+
+       cptr = env_get("coremask_override");
+       if (cptr) {
+               if (cvmx_coremask_str2bmp(pcm, cptr) < 0)
+                       return NULL;
+       }
+
+       return pcm;
+}
+
+/* Validate the coremask that is passed to a boot* function. */
+int validate_coremask(struct cvmx_coremask *pcm)
+{
+       struct cvmx_coremask coremask_override;
+       struct cvmx_coremask fuse_coremask;
+
+       if (!get_coremask_override(&coremask_override))
+               return -1;
+
+       octeon_get_available_coremask(&fuse_coremask);
+
+       if (!cvmx_coremask_is_subset(&fuse_coremask, pcm)) {
+               puts("ERROR: Can't boot cores that don't exist!\n");
+               puts("Available coremask:\n");
+               cvmx_coremask_print(&fuse_coremask);
+               return -1;
+       }
+
+       if (!cvmx_coremask_is_subset(&coremask_override, pcm)) {
+               struct cvmx_coremask print_cm;
+
+               puts("Notice: coremask changed from:\n");
+               cvmx_coremask_print(pcm);
+               puts("based on coremask_override of:\n");
+               cvmx_coremask_print(&coremask_override);
+               cvmx_coremask_and(&print_cm, pcm, &coremask_override);
+               puts("to:\n");
+               cvmx_coremask_print(&print_cm);
+       }
+
+       return 0;
+}
+
+/**
+ * In CIU_FUSE for the 78XX, odd and even cores are separated out.
+ * For example, a CIU_FUSE value of 0xfffffefffffe indicates that bits 0 and 1
+ * are set.
+ * This function converts the bit number in the CIU_FUSE register to a
+ * physical core number.
+ */
+static int convert_ciu_fuse_to_physical_core(int core, int max_cores)
+{
+       if (!octeon_has_feature(OCTEON_FEATURE_CIU3))
+               return core;
+       else if (!OCTEON_IS_MODEL(OCTEON_CN78XX))
+               return core;
+       else if (core < (max_cores / 2))
+               return core * 2;
+       else
+               return ((core - (max_cores / 2)) * 2) + 1;
+}
+
+/**
+ * Get the total number of fuses blown as well as the number blown per tad.
+ *
+ * @param      coremask        fuse coremask
+ * @param[out] tad_blown_count number of cores blown for each tad
+ * @param      num_tads        number of tads
+ * @param      max_cores       maximum number of cores
+ *
+ * @return     void
+ */
+void fill_tad_corecount(u64 coremask, int tad_blown_count[], int num_tads,
+                       int max_cores)
+{
+       int core, physical_core;
+
+       for (core = 0; core < max_cores; core++) {
+               if (!(coremask & (1ULL << core))) {
+                       int tad;
+
+                       physical_core =
+                               convert_ciu_fuse_to_physical_core(core,
+                                                                 max_cores);
+                       tad = physical_core % num_tads;
+                       tad_blown_count[tad]++;
+               }
+       }
+}
+
+u64 get_core_pattern(int num_tads, int max_cores)
+{
+       u64 pattern = 1ULL;
+       int cnt;
+
+       for (cnt = 1; cnt < (max_cores / num_tads); cnt++)
+               pattern |= pattern << num_tads;
+
+       return pattern;
+}
+
+/**
+ * For CN78XX and CN68XX this function returns the logical coremask from the
+ * CIU_FUSE register value. For other models there is no difference.
+ *
+ * @param ciu_fuse_value       fuse value from CIU_FUSE register
+ * @return logical coremask of CIU_FUSE value.
+ */
+u64 get_logical_coremask(u64 ciu_fuse_value)
+{
+       int tad_blown_count[MAX_CORE_TADS] = {0};
+       int tad;
+       u64 logical_coremask = 0;
+       u64 tad_mask, pattern;
+       int num_tads, max_cores;
+
+       if (OCTEON_IS_MODEL(OCTEON_CN78XX)) {
+               num_tads = 8;
+               max_cores = 48;
+       } else if (OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+                  OCTEON_IS_MODEL(OCTEON_CNF75XX)) {
+               num_tads = 4;
+               max_cores = 16;
+       } else if (OCTEON_IS_MODEL(OCTEON_CN68XX)) {
+               num_tads = 4;
+               max_cores = 32;
+       } else {
+               /* Most Octeon devices don't need any mapping. */
+               return ciu_fuse_value;
+       }
+
+       pattern = get_core_pattern(num_tads, max_cores);
+       fill_tad_corecount(ciu_fuse_value, tad_blown_count,
+                          num_tads, max_cores);
+
+       for (tad = 0; tad < num_tads; tad++) {
+               tad_mask = pattern << tad;
+               logical_coremask |= tad_mask >> (tad_blown_count[tad] * num_tads);
+       }
+       return logical_coremask;
+}
+
+/**
+ * Returns the available coremask either from env or fuses.
+ * If the fuses are blown and locked, they are the definitive coremask.
+ *
+ * @param pcm  pointer to coremask to fill in
+ * @return pointer to coremask
+ */
+struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm)
+{
+       u8 node_mask = 0x01;    /* ToDo: Currently only one node is supported */
+       u64 ciu_fuse;
+       u64 cores;
+
+       cvmx_coremask_clear_all(pcm);
+
+       if (octeon_has_feature(OCTEON_FEATURE_CIU3)) {
+               int node;
+
+               cvmx_coremask_for_each_node(node, node_mask) {
+                       ciu_fuse = (csr_rd(CVMX_CIU_FUSE) &
+                                   0x0000FFFFFFFFFFFFULL);
+
+                       ciu_fuse = get_logical_coremask(ciu_fuse);
+                       cvmx_coremask_set64_node(pcm, node, ciu_fuse);
+               }
+
+               return pcm;
+       }
+
+       ciu_fuse = (csr_rd(CVMX_CIU_FUSE) & 0x0000FFFFFFFFFFFFULL);
+       ciu_fuse = get_logical_coremask(ciu_fuse);
+
+       if (OCTEON_IS_MODEL(OCTEON_CN68XX))
+               cvmx_coremask_set64(pcm, ciu_fuse);
+
+       /* Get number of cores from fuse register, convert to coremask */
+       cores = __builtin_popcountll(ciu_fuse);
+
+       cvmx_coremask_set_cores(pcm, 0, cores);
+
+       return pcm;
+}
+
+int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr)
+{
+       int i, j;
+       int l;          /* length of the hexstr in characters */
+       int lb;         /* number of bits taken by hexstr */
+       int hldr_offset;/* holder's offset within the coremask */
+       int hldr_xsz;   /* holder's size in the number of hex digits */
+       u64 h;
+       char c;
+
+#define MINUS_ONE (hexstr[0] == '-' && hexstr[1] == '1' && hexstr[2] == 0)
+       if (MINUS_ONE) {
+               cvmx_coremask_set_all(pcm);
+               return 0;
+       }
+
+       /* Skip '0x' from hexstr */
+       if (hexstr[0] == '0' && (hexstr[1] == 'x' || hexstr[1] == 'X'))
+               hexstr += 2;
+
+       if (!strlen(hexstr)) {
+               printf("%s: Error: hex string is empty\n", __func__);
+               return -2;
+       }
+
+       /* Trim leading zeros */
+       while (*hexstr == '0')
+               hexstr++;
+
+       cvmx_coremask_clear_all(pcm);
+       l = strlen(hexstr);
+
+       /* If length is 0 then the hex string must be all zeros */
+       if (l == 0)
+               return 0;
+
+       for (i = 0; i < l; i++) {
+               if (isxdigit((int)hexstr[i]) == 0) {
+                       printf("%s: Non-hex digit within hexstr\n", __func__);
+                       return -2;
+               }
+       }
+
+       lb = (l - 1) * 4;
+       if (hexstr[0] > '7')
+               lb += 4;
+       else if (hexstr[0] > '3')
+               lb += 3;
+       else if (hexstr[0] > '1')
+               lb += 2;
+       else
+               lb += 1;
+       if (lb > CVMX_MIPS_MAX_CORES) {
+               printf("%s: hexstr (%s) is too long\n", __func__, hexstr);
+               return -1;
+       }
+
+       hldr_offset = 0;
+       hldr_xsz = 2 * sizeof(u64);
+       for (i = l; i > 0; i -= hldr_xsz) {
+               c = hexstr[i];
+               hexstr[i] = 0;
+               j = i - hldr_xsz;
+               if (j < 0)
+                       j = 0;
+               h = simple_strtoull(&hexstr[j], NULL, 16);
+               if (errno == EINVAL) {
+                       printf("%s: strtou returns w/ EINVAL\n", __func__);
+                       return -2;
+               }
+               pcm->coremask_bitmap[hldr_offset] = h;
+               hexstr[i] = c;
+               hldr_offset++;
+       }
+
+       return 0;
+}
+
+void cvmx_coremask_print(const struct cvmx_coremask *pcm)
+{
+       int i, j;
+       int start;
+       int found = 0;
+
+       /*
+        * Print one node per line. Since the bitmap is stored LSB to MSB
+        * we reverse the order when printing.
+        */
+       if (!octeon_has_feature(OCTEON_FEATURE_MULTINODE)) {
+               start = 0;
+               for (j = CVMX_COREMASK_MAX_CORES_PER_NODE -
+                            CVMX_COREMASK_HLDRSZ;
+                    j >= 0; j -= CVMX_COREMASK_HLDRSZ) {
+                       if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0)
+                               start = 1;
+                       if (start) {
+                               printf(" 0x%llx",
+                                      (u64)pcm->coremask_bitmap[j /
+                                               CVMX_COREMASK_HLDRSZ]);
+                       }
+               }
+
+               if (start)
+                       found = 1;
+
+               /*
+                * If the coremask is empty print <EMPTY> so it is not
+                * confusing
+                */
+               if (!found)
+                       printf("<EMPTY>");
+               printf("\n");
+
+               return;
+       }
+
+       for (i = 0; i < CVMX_MAX_USED_CORES_BMP;
+            i += CVMX_COREMASK_MAX_CORES_PER_NODE) {
+               printf("%s  node %d:", i > 0 ? "\n" : "",
+                      cvmx_coremask_core_to_node(i));
+               start = 0;
+
+               for (j = i + CVMX_COREMASK_MAX_CORES_PER_NODE -
+                            CVMX_COREMASK_HLDRSZ;
+                    j >= i;
+                    j -= CVMX_COREMASK_HLDRSZ) {
+                       /* Don't start printing until we get a non-zero word. */
+                       if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0)
+                               start = 1;
+
+                       if (start) {
+                               printf(" 0x%llx", (u64)pcm->coremask_bitmap[j /
+                                                       CVMX_COREMASK_HLDRSZ]);
+                       }
+               }
+
+               if (start)
+                       found = 1;
+       }
+
+       i /= CVMX_COREMASK_HLDRSZ;
+       for (; i < CVMX_COREMASK_BMPSZ; i++) {
+               if (pcm->coremask_bitmap[i]) {
+                       printf("  EXTRA GARBAGE[%i]: %016llx\n", i,
+                              (u64)pcm->coremask_bitmap[i]);
+               }
+       }
+
+       /* If the coremask is empty print <EMPTY> so it is not confusing */
+       if (!found)
+               printf("<EMPTY>");
+
+       printf("\n");
+}
index ff7a59f..6dc08e1 100644 (file)
@@ -1,28 +1,84 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Copyright (C) Stefan Roese <sr@denx.de>
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
  */
 
+#include <config.h>
 #include <dm.h>
 #include <ram.h>
 #include <asm/global_data.h>
 #include <linux/compat.h>
+#include <display_options.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#define UBOOT_RAM_SIZE_MAX     0x10000000ULL
+
 int dram_init(void)
 {
-       /*
-        * No DDR init yet -> run in L2 cache
-        */
-       gd->ram_size = (4 << 20);
-       gd->bd->bi_dram[0].size = gd->ram_size;
-       gd->bd->bi_dram[1].size = 0;
+       if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+               struct ram_info ram;
+               struct udevice *dev;
+               int ret;
+
+               ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+               if (ret) {
+                       debug("DRAM init failed: %d\n", ret);
+                       return ret;
+               }
+
+               ret = ram_get_info(dev, &ram);
+               if (ret) {
+                       debug("Cannot get DRAM size: %d\n", ret);
+                       return ret;
+               }
+
+               gd->ram_size = min_t(size_t, ram.size, UBOOT_RAM_SIZE_MAX);
+               debug("SDRAM base=%lx, size=%lx\n",
+                     (unsigned long)ram.base, (unsigned long)ram.size);
+       } else {
+               /*
+                * No DDR init yet -> run in L2 cache
+                */
+               gd->ram_size = (4 << 20);
+               gd->bd->bi_dram[0].size = gd->ram_size;
+               gd->bd->bi_dram[1].size = 0;
+       }
 
        return 0;
 }
 
+void board_add_ram_info(int use_default)
+{
+       if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+               struct ram_info ram;
+               struct udevice *dev;
+               int ret;
+
+               ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+               if (ret) {
+                       debug("DRAM init failed: %d\n", ret);
+                       return;
+               }
+
+               ret = ram_get_info(dev, &ram);
+               if (ret) {
+                       debug("Cannot get DRAM size: %d\n", ret);
+                       return;
+               }
+
+               printf(" (");
+               print_size(ram.size, " total)");
+       }
+}
+
 ulong board_get_usable_ram_top(ulong total_size)
 {
-       return gd->ram_top;
+       if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+               /* Map a maximum of 256MiB - return not size but address */
+               return CONFIG_SYS_SDRAM_BASE + min(gd->ram_size,
+                                                  UBOOT_RAM_SIZE_MAX);
+       } else {
+               return gd->ram_top;
+       }
 }
diff --git a/arch/mips/mach-octeon/include/mach/bootoct_cmd.h b/arch/mips/mach-octeon/include/mach/bootoct_cmd.h
new file mode 100644 (file)
index 0000000..657698b
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __BOOTOCT_CMD_H__
+#define __BOOTOCT_CMD_H__
+
+#include "cvmx-coremask.h"
+
+enum octeon_boot_cmd_type {
+       BOOTOCT,
+       BOOTOCTLINUX,
+       BOOTOCTELF
+};
+
+/** Structure to contain results of command line argument parsing */
+struct octeon_boot_args {
+       struct cvmx_coremask coremask;  /** Parsed coremask */
+       int num_cores[CVMX_MAX_NODES];  /** number of cores */
+       int num_skipped[CVMX_MAX_NODES];/** number of skipped cores */
+       const char *app_name;           /** Application name */
+       const char *named_block;        /** Named block to load Linux into */
+       u32 stack_size;                 /** stack size */
+       u32 heap_size;                  /** heap size */
+       u32 boot_flags;                 /** boot flags */
+       int node_mask;                  /** Node mask to use */
+       int console_uart;               /** serial console number */
+       bool forceboot;                 /** force booting if core 0 not set */
+       bool coremask_set;              /** set if coremask was set */
+       bool num_cores_set;             /** Set if num_cores was set */
+       bool num_skipped_set;           /** Set if num_skipped was set */
+       /** Set if endbootargs parameter was passed. */
+       bool endbootargs;
+};
+
+/**
+ * Parse command line arguments
+ *
+ * @param argc                 number of arguments
+ * @param[in] argv             array of argument strings
+ * @param cmd                  command type
+ * @param[out] boot_args       parsed values
+ *
+ * @return number of arguments parsed
+ */
+int octeon_parse_bootopts(int argc, char *const argv[],
+                         enum octeon_boot_cmd_type cmd,
+                         struct octeon_boot_args *boot_args);
+
+void nmi_bootvector(void);
+extern u64 nmi_handler_para[];
+
+#endif /* __BOOTOCT_CMD_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h b/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h
new file mode 100644 (file)
index 0000000..3379871
--- /dev/null
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/*
+ * Header file containing the ABI with the bootloader.
+ */
+
+#ifndef __CVMX_BOOTINFO_H__
+#define __CVMX_BOOTINFO_H__
+
+#include "cvmx-coremask.h"
+
+/*
+ * Current major and minor versions of the CVMX bootinfo block that is
+ * passed from the bootloader to the application.  This is versioned
+ * so that applications can properly handle multiple bootloader
+ * versions.
+ */
+#define CVMX_BOOTINFO_MAJ_VER 1
+#define CVMX_BOOTINFO_MIN_VER 4
+
+#if (CVMX_BOOTINFO_MAJ_VER == 1)
+#define CVMX_BOOTINFO_OCTEON_SERIAL_LEN 20
+/*
+ * This structure is populated by the bootloader.  For binary
+ * compatibility the only changes that should be made are
+ * adding members to the end of the structure, and the minor
+ * version should be incremented at that time.
+ * If an incompatible change is made, the major version
+ * must be incremented, and the minor version should be reset
+ * to 0.
+ */
+struct cvmx_bootinfo {
+       u32 major_version;
+       u32 minor_version;
+
+       u64 stack_top;
+       u64 heap_base;
+       u64 heap_end;
+       u64 desc_vaddr;
+
+       u32 exception_base_addr;
+       u32 stack_size;
+       u32 flags;
+       u32 core_mask;
+       /* DRAM size in megabytes */
+       u32 dram_size;
+       /* physical address of free memory descriptor block*/
+       u32 phy_mem_desc_addr;
+       /* used to pass flags from app to debugger */
+       u32 debugger_flags_base_addr;
+
+       /* CPU clock speed, in hz */
+       u32 eclock_hz;
+
+       /* DRAM clock speed, in hz */
+       u32 dclock_hz;
+
+       u32 reserved0;
+       u16 board_type;
+       u8 board_rev_major;
+       u8 board_rev_minor;
+       u16 reserved1;
+       u8 reserved2;
+       u8 reserved3;
+       char board_serial_number[CVMX_BOOTINFO_OCTEON_SERIAL_LEN];
+       u8 mac_addr_base[6];
+       u8 mac_addr_count;
+#if (CVMX_BOOTINFO_MIN_VER >= 1)
+       /*
+        * Several boards support compact flash on the Octeon boot
+        * bus.  The CF memory spaces may be mapped to different
+        * addresses on different boards.  These are the physical
+        * addresses, so care must be taken to use the correct
+        * XKPHYS/KSEG0 addressing depending on the application's
+        * ABI.  These values will be 0 if CF is not present.
+        */
+       u64 compact_flash_common_base_addr;
+       u64 compact_flash_attribute_base_addr;
+       /*
+        * Base address of the LED display (as on EBT3000 board)
+        * This will be 0 if LED display not present.
+        */
+       u64 led_display_base_addr;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 2)
+       /* DFA reference clock in hz (if applicable)*/
+       u32 dfa_ref_clock_hz;
+
+       /*
+        * flags indicating various configuration options.  These
+        * flags supercede the 'flags' variable and should be used
+        * instead if available.
+        */
+       u32 config_flags;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 3)
+       /*
+        * Address of the OF Flattened Device Tree structure
+        * describing the board.
+        */
+       u64 fdt_addr;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 4)
+       /*
+        * Coremask used for processors with more than 32 cores
+        * or with OCI.  This replaces core_mask.
+        */
+       struct cvmx_coremask ext_core_mask;
+#endif
+};
+
+#define CVMX_BOOTINFO_CFG_FLAG_PCI_HOST                        (1ull << 0)
+#define CVMX_BOOTINFO_CFG_FLAG_PCI_TARGET              (1ull << 1)
+#define CVMX_BOOTINFO_CFG_FLAG_DEBUG                   (1ull << 2)
+#define CVMX_BOOTINFO_CFG_FLAG_NO_MAGIC                        (1ull << 3)
+/*
+ * This flag is set if the TLB mappings are not contained in the
+ * 0x10000000 - 0x20000000 boot bus region.
+ */
+#define CVMX_BOOTINFO_CFG_FLAG_OVERSIZE_TLB_MAPPING    (1ull << 4)
+#define CVMX_BOOTINFO_CFG_FLAG_BREAK                   (1ull << 5)
+
+#endif /*   (CVMX_BOOTINFO_MAJ_VER == 1) */
+
+/* Type defines for board and chip types */
+enum cvmx_board_types_enum {
+       CVMX_BOARD_TYPE_NULL = 0,
+       CVMX_BOARD_TYPE_SIM = 1,
+       CVMX_BOARD_TYPE_EBT3000 = 2,
+       CVMX_BOARD_TYPE_KODAMA = 3,
+       CVMX_BOARD_TYPE_NIAGARA = 4,
+       CVMX_BOARD_TYPE_NAC38 = 5,      /* formerly NAO38 */
+       CVMX_BOARD_TYPE_THUNDER = 6,
+       CVMX_BOARD_TYPE_TRANTOR = 7,
+       CVMX_BOARD_TYPE_EBH3000 = 8,
+       CVMX_BOARD_TYPE_EBH3100 = 9,
+       CVMX_BOARD_TYPE_HIKARI = 10,
+       CVMX_BOARD_TYPE_CN3010_EVB_HS5 = 11,
+       CVMX_BOARD_TYPE_CN3005_EVB_HS5 = 12,
+       CVMX_BOARD_TYPE_KBP = 13,
+       /* Deprecated, CVMX_BOARD_TYPE_CN3010_EVB_HS5 supports the CN3020 */
+       CVMX_BOARD_TYPE_CN3020_EVB_HS5 = 14,
+       CVMX_BOARD_TYPE_EBT5800 = 15,
+       CVMX_BOARD_TYPE_NICPRO2 = 16,
+       CVMX_BOARD_TYPE_EBH5600 = 17,
+       CVMX_BOARD_TYPE_EBH5601 = 18,
+       CVMX_BOARD_TYPE_EBH5200 = 19,
+       CVMX_BOARD_TYPE_BBGW_REF = 20,
+       CVMX_BOARD_TYPE_NIC_XLE_4G = 21,
+       CVMX_BOARD_TYPE_EBT5600 = 22,
+       CVMX_BOARD_TYPE_EBH5201 = 23,
+       CVMX_BOARD_TYPE_EBT5200 = 24,
+       CVMX_BOARD_TYPE_CB5600  = 25,
+       CVMX_BOARD_TYPE_CB5601  = 26,
+       CVMX_BOARD_TYPE_CB5200  = 27,
+       /* Special 'generic' board type, supports many boards */
+       CVMX_BOARD_TYPE_GENERIC = 28,
+       CVMX_BOARD_TYPE_EBH5610 = 29,
+       CVMX_BOARD_TYPE_LANAI2_A = 30,
+       CVMX_BOARD_TYPE_LANAI2_U = 31,
+       CVMX_BOARD_TYPE_EBB5600 = 32,
+       CVMX_BOARD_TYPE_EBB6300 = 33,
+       CVMX_BOARD_TYPE_NIC_XLE_10G = 34,
+       CVMX_BOARD_TYPE_LANAI2_G = 35,
+       CVMX_BOARD_TYPE_EBT5810 = 36,
+       CVMX_BOARD_TYPE_NIC10E = 37,
+       CVMX_BOARD_TYPE_EP6300C = 38,
+       CVMX_BOARD_TYPE_EBB6800 = 39,
+       CVMX_BOARD_TYPE_NIC4E = 40,
+       CVMX_BOARD_TYPE_NIC2E = 41,
+       CVMX_BOARD_TYPE_EBB6600 = 42,
+       CVMX_BOARD_TYPE_REDWING = 43,
+       CVMX_BOARD_TYPE_NIC68_4 = 44,
+       CVMX_BOARD_TYPE_NIC10E_66 = 45,
+       CVMX_BOARD_TYPE_MAX,
+
+       /*
+        * The range from CVMX_BOARD_TYPE_MAX to
+        * CVMX_BOARD_TYPE_CUST_DEFINED_MIN is reserved for future
+        * SDK use.
+        */
+
+       /*
+        * Set aside a range for customer boards.  These numbers are managed
+        * by Cavium.
+        */
+       CVMX_BOARD_TYPE_CUST_DEFINED_MIN = 10000,
+       CVMX_BOARD_TYPE_CUST_WSX16 = 10001,
+       CVMX_BOARD_TYPE_CUST_NS0216 = 10002,
+       CVMX_BOARD_TYPE_CUST_NB5 = 10003,
+       CVMX_BOARD_TYPE_CUST_WMR500 = 10004,
+       CVMX_BOARD_TYPE_CUST_ITB101 = 10005,
+       CVMX_BOARD_TYPE_CUST_NTE102 = 10006,
+       CVMX_BOARD_TYPE_CUST_AGS103 = 10007,
+       CVMX_BOARD_TYPE_CUST_GST104 = 10008,
+       CVMX_BOARD_TYPE_CUST_GCT105 = 10009,
+       CVMX_BOARD_TYPE_CUST_AGS106 = 10010,
+       CVMX_BOARD_TYPE_CUST_SGM107 = 10011,
+       CVMX_BOARD_TYPE_CUST_GCT108 = 10012,
+       CVMX_BOARD_TYPE_CUST_AGS109 = 10013,
+       CVMX_BOARD_TYPE_CUST_GCT110 = 10014,
+       CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER = 10015,
+       CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER = 10016,
+       CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX = 10017,
+       CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX = 10018,
+       CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX = 10019,
+       CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX = 10020,
+       CVMX_BOARD_TYPE_CUST_L2_ZINWELL = 10021,
+       CVMX_BOARD_TYPE_CUST_DEFINED_MAX = 20000,
+
+       /*
+        * Set aside a range for customer private use.  The SDK won't
+        * use any numbers in this range.
+        */
+       CVMX_BOARD_TYPE_CUST_PRIVATE_MIN = 20001,
+       CVMX_BOARD_TYPE_UBNT_E100 = 20002,
+       CVMX_BOARD_TYPE_CUST_DSR1000N = 20006,
+       CVMX_BOARD_TYPE_KONTRON_S1901 = 21901,
+       CVMX_BOARD_TYPE_CUST_PRIVATE_MAX = 30000,
+
+       /* The remaining range is reserved for future use. */
+};
+
+enum cvmx_chip_types_enum {
+       CVMX_CHIP_TYPE_NULL = 0,
+       CVMX_CHIP_SIM_TYPE_DEPRECATED = 1,
+       CVMX_CHIP_TYPE_OCTEON_SAMPLE = 2,
+       CVMX_CHIP_TYPE_MAX,
+};
+
+/*
+ * Compatibility alias for NAC38 name change, planned to be removed
+ * from SDK 1.7
+ */
+#define CVMX_BOARD_TYPE_NAO38  CVMX_BOARD_TYPE_NAC38
+
+/* Functions to return string based on type */
+#define ENUM_BRD_TYPE_CASE(x)                                          \
+       case x:                                                         \
+               return(#x + 16)         /* Skip CVMX_BOARD_TYPE_ */
+
+static inline const char *cvmx_board_type_to_string(enum
+                                                   cvmx_board_types_enum type)
+{
+       switch (type) {
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NULL);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_SIM);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT3000);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KODAMA);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIAGARA);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NAC38);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_THUNDER);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_TRANTOR);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3000);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3100);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_HIKARI);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3010_EVB_HS5);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3005_EVB_HS5);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KBP);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3020_EVB_HS5);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5800);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NICPRO2);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5600);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5601);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5200);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_BBGW_REF);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_4G);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5600);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5201);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5200);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5600);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5601);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5200);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_GENERIC);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5610);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_A);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_U);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB5600);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6300);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_10G);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_G);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5810);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EP6300C);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6800);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC4E);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC2E);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6600);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_REDWING);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC68_4);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E_66);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_MAX);
+
+               /* Customer boards listed here */
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MIN);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WSX16);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NS0216);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NB5);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WMR500);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_ITB101);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NTE102);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS103);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GST104);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT105);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS106);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_SGM107);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT108);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS109);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT110);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ZINWELL);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MAX);
+
+               /* Customer private range */
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MIN);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E100);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DSR1000N);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KONTRON_S1901);
+               ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MAX);
+       }
+
+       return NULL;
+}
+
+#define ENUM_CHIP_TYPE_CASE(x)                                         \
+       case x:                                                         \
+               return(#x + 15)         /* Skip CVMX_CHIP_TYPE */
+
+static inline const char *cvmx_chip_type_to_string(enum
+                                                  cvmx_chip_types_enum type)
+{
+       switch (type) {
+               ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_NULL);
+               ENUM_CHIP_TYPE_CASE(CVMX_CHIP_SIM_TYPE_DEPRECATED);
+               ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_OCTEON_SAMPLE);
+               ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_MAX);
+       }
+
+       return "Unsupported Chip";
+}
+
+#endif /* __CVMX_BOOTINFO_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h b/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h
new file mode 100644 (file)
index 0000000..d60668c
--- /dev/null
@@ -0,0 +1,533 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/**
+ * @file
+ * Simple allocate only memory allocator. Used to allocate memory at application
+ * start time.
+ */
+
+#ifndef __CVMX_BOOTMEM_H__
+#define __CVMX_BOOTMEM_H__
+
+/* Must be multiple of 8, changing breaks ABI */
+#define CVMX_BOOTMEM_NAME_LEN          128
+/* Can change without breaking ABI */
+#define CVMX_BOOTMEM_NUM_NAMED_BLOCKS  64
+/* minimum alignment of bootmem alloced blocks */
+#define CVMX_BOOTMEM_ALIGNMENT_SIZE    (16ull)
+
+/* Flags for cvmx_bootmem_phy_mem* functions */
+/* Allocate from end of block instead of beginning */
+#define CVMX_BOOTMEM_FLAG_END_ALLOC    (1 << 0)
+#define CVMX_BOOTMEM_FLAG_NO_LOCKING   (1 << 1) /* Don't do any locking. */
+
+/* Real physical addresses of memory regions */
+#define OCTEON_DDR0_BASE    (0x0ULL)
+#define OCTEON_DDR0_SIZE    (0x010000000ULL)
+#define OCTEON_DDR1_BASE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+                            ? 0x20000000ULL : 0x410000000ULL)
+#define OCTEON_DDR1_SIZE    (0x010000000ULL)
+#define OCTEON_DDR2_BASE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+                            ? 0x30000000ULL : 0x20000000ULL)
+#define OCTEON_DDR2_SIZE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+                            ? 0x7d0000000ULL : 0x3e0000000ULL)
+#define OCTEON_MAX_PHY_MEM_SIZE ((OCTEON_IS_MODEL(OCTEON_CN68XX))      \
+                                ? 128 * 1024 * 1024 * 1024ULL          \
+                                : (OCTEON_IS_OCTEON2())                \
+                                ? 32 * 1024 * 1024 * 1024ull           \
+                                : (OCTEON_IS_OCTEON3())                \
+                                ? 512 * 1024 * 1024 * 1024ULL          \
+                                : 16 * 1024 * 1024 * 1024ULL)
+
+/*
+ * First bytes of each free physical block of memory contain this structure,
+ * which is used to maintain the free memory list.  Since the bootloader is
+ * only 32 bits, there is a union providing 64 and 32 bit versions.  The
+ * application init code converts addresses to 64 bit addresses before the
+ * application starts.
+ */
+struct cvmx_bootmem_block_header {
+       /* Note: these are referenced from assembly routines in the bootloader,
+        * so this structure should not be changed without changing those
+        * routines as well.
+        */
+       u64 next_block_addr;
+       u64 size;
+
+};
+
+/*
+ * Structure for named memory blocks
+ * Number of descriptors
+ * available can be changed without affecting compatibility,
+ * but name length changes require a bump in the bootmem
+ * descriptor version
+ * Note: This structure must be naturally 64 bit aligned, as a single
+ * memory image will be used by both 32 and 64 bit programs.
+ */
+struct cvmx_bootmem_named_block_desc {
+       u64 base_addr;  /* Base address of named block */
+       /*
+        * Size actually allocated for named block (may differ from requested)
+        */
+       u64 size;
+       char name[CVMX_BOOTMEM_NAME_LEN]; /* name of named block */
+};
+
+/* Current descriptor versions */
+/* CVMX bootmem descriptor major version */
+#define CVMX_BOOTMEM_DESC_MAJ_VER      3
+/* CVMX bootmem descriptor minor version */
+#define CVMX_BOOTMEM_DESC_MIN_VER      0
+
+/*
+ * First three members of cvmx_bootmem_desc_t are left in original
+ * positions for backwards compatibility.
+ */
+struct cvmx_bootmem_desc {
+       /* Linux compatible proxy for __BIG_ENDIAN */
+       u32 lock;       /* spinlock to control access to list */
+       u32 flags;      /* flags for indicating various conditions */
+       u64 head_addr;
+
+       /* incremented changed when incompatible changes made */
+       u32 major_version;
+       /*
+        * incremented changed when compatible changes made, reset to
+        * zero when major incremented
+        */
+       u32 minor_version;
+       u64 app_data_addr;
+       u64 app_data_size;
+
+       /* number of elements in named blocks array */
+       u32 named_block_num_blocks;
+       /* length of name array in bootmem blocks */
+       u32 named_block_name_len;
+       /* address of named memory block descriptors */
+       u64 named_block_array_addr;
+};
+
+/**
+ * Initialize the boot alloc memory structures. This is
+ * normally called inside of cvmx_user_app_init()
+ *
+ * @param mem_desc_addr        Address of the free memory list
+ * @return
+ */
+int cvmx_bootmem_init(u64 mem_desc_addr);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader.
+ * This is an allocate-only algorithm, so freeing memory is not possible.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc(u64 size, u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader from a specific node.
+ * This is an allocate-only algorithm, so freeing memory is not possible.
+ *
+ * @param node The node to allocate memory from
+ * @param size  Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was
+ * passed to the application by the bootloader at a specific
+ * address. This is an allocate-only algorithm, so
+ * freeing memory is not possible. Allocation will fail if
+ * memory cannot be allocated at the specified address.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param address   Physical address to allocate memory at.  If this
+ *                  memory is not available, the allocation fails.
+ * @param alignment Alignment required - must be power of 2
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_address(u64 size, u64 address,
+                                u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was
+ * passed to the application by the bootloader within a specified
+ * address range. This is an allocate-only algorithm, so
+ * freeing memory is not possible. Allocation will fail if
+ * memory cannot be allocated in the requested range.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param min_addr  defines the minimum address of the range
+ * @param max_addr  defines the maximum address of the range
+ * @param alignment Alignment required - must be power of 2
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_range(u64 size, u64 alignment,
+                              u64 min_addr, u64 max_addr);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size  Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ * @param name  name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named(u64 size, u64 alignment,
+                              const char *name);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment,
+                                    const char *name, u32 flags);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size    Size in bytes of block to allocate
+ * @param address Physical address to allocate memory at.  If this
+ *                memory is not available, the allocation fails.
+ * @param name    name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_address(u64 size, u64 address,
+                                      const char *name);
+
+/**
+ * Allocate a block of memory from a specific range of the free list
+ * that was passed to the application by the bootloader, and assign it
+ * a name in the global named block table.  (part of the
+ * cvmx_bootmem_descriptor_t structure) Named blocks can later be
+ * freed.  If request cannot be satisfied within the address range
+ * specified, NULL is returned
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param min_addr  minimum address of range
+ * @param max_addr  maximum address of range
+ * @param align  Alignment of memory to be allocated. (must be a power of 2)
+ * @param name   name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr,
+                                    u64 max_addr, u64 align,
+                                    const char *name);
+
+/**
+ * Allocate if needed a block of memory from a specific range of the
+ * free list that was passed to the application by the bootloader, and
+ * assign it a name in the global named block table.  (part of the
+ * cvmx_bootmem_descriptor_t structure) Named blocks can later be
+ * freed.  If the requested name block is already allocated, return
+ * the pointer to block of memory.  If request cannot be satisfied
+ * within the address range specified, NULL is returned
+ *
+ * @param size   Size in bytes of block to allocate
+ * @param min_addr  minimum address of range
+ * @param max_addr  maximum address of range
+ * @param align  Alignment of memory to be allocated. (must be a power of 2)
+ * @param name   name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ * @param init   Initialization function
+ *
+ * The initialization function is optional, if omitted the named block
+ * is initialized to all zeros when it is created, i.e. once.
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_range_once(u64 size,
+                                         u64 min_addr,
+                                         u64 max_addr,
+                                         u64 align,
+                                         const char *name,
+                                         void (*init)(void *));
+
+/**
+ * Allocate all free memory starting at the start address.  This is used to
+ * prevent any free blocks from later being allocated within the reserved space.
+ * Note that any memory allocated with this function cannot be later freed.
+ *
+ * @param start_addr  Starting address to reserve
+ * @param size        Size in bytes to reserve starting at start_addr
+ * @param name        Name to assign to reserved blocks
+ * @param flags       Flags to use when reserving memory
+ *
+ * @return 0 on failure,
+ *         !0 on success
+ */
+int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size,
+                               const char *name, u32 flags);
+
+/**
+ * Frees a previously allocated named bootmem block.
+ *
+ * @param name   name of block to free
+ *
+ * @return 0 on failure,
+ *         !0 on success
+ */
+int cvmx_bootmem_free_named(const char *name);
+
+/**
+ * Finds a named bootmem block by name.
+ *
+ * @param name   name of block to free
+ *
+ * @return pointer to named block descriptor on success
+ *         0 on failure
+ */
+const struct cvmx_bootmem_named_block_desc *
+cvmx_bootmem_find_named_block(const char *name);
+
+/**
+ * Returns the size of available memory in bytes, only
+ * counting blocks that are at least as big as the minimum block
+ * size.
+ *
+ * @param min_block_size
+ *               Minimum block size to count in total.
+ *
+ * @return Number of bytes available for allocation that meet the
+ * block size requirement
+ */
+u64 cvmx_bootmem_available_mem(u64 min_block_size);
+
+/**
+ * Prints out the list of named blocks that have been allocated
+ * along with their addresses and sizes.
+ * This is primarily used for debugging purposes
+ */
+void cvmx_bootmem_print_named(void);
+
+/**
+ * Allocates a block of physical memory from the free list, at
+ * (optional) requested address and alignment.
+ *
+ * @param req_size size of region to allocate.  All requests are
+ * rounded up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size
+ *
+ * @param address_min Minimum address that block can occupy.
+ *
+ * @param address_max Specifies the maximum address_min (inclusive)
+ * that the allocation can use.
+ *
+ * @param alignment Requested alignment of the block.  If this
+ *                  alignment cannot be met, the allocation fails.
+ *                  This must be a power of 2.  (Note: Alignment of
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and
+ *                  internally enforced.  Requested alignments of less
+ *                  than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE.)
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return physical address of block allocated, or -1 on failure
+ */
+s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min, u64 address_max,
+                          u64 alignment, u32 flags);
+
+/**
+ * Allocates a named block of physical memory from the free list, at
+ * (optional) requested address and alignment.
+ *
+ * @param size size of region to allocate.  All requests are rounded
+ * up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size
+ *
+ * @param min_addr  Minimum address that block can occupy.
+ *
+ * @param max_addr Specifies the maximum address_min (inclusive) that
+ * the allocation can use.
+ *
+ * @param alignment Requested alignment of the block.  If this
+ *                  alignment cannot be met, the allocation fails.
+ *                  This must be a power of 2.  (Note: Alignment of
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and
+ *                  internally enforced.  Requested alignments of less
+ *                  than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE.)
+ *
+ * @param name      name to assign to named block
+ *
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return physical address of block allocated, or -1 on failure
+ */
+s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr, u64 max_addr,
+                                      u64 alignment, const char *name,
+                                      u32 flags);
+
+/**
+ * Finds a named memory block by name.
+ * Also used for finding an unused entry in the named block table.
+ *
+ * @param name Name of memory block to find.  If NULL pointer given,
+ *             then finds unused descriptor, if available.
+ *
+ * @param flags  Flags to control options for the allocation.
+ *
+ * @return Physical address of the memory block descriptor, zero if not
+ *         found. If zero returned when name parameter is NULL, then no
+ *         memory block descriptors are available.
+ */
+u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags);
+
+/**
+ * Returns the size of available memory in bytes, only
+ * counting blocks that are at least as big as the minimum block
+ * size.
+ *
+ * @param min_block_size
+ *               Minimum block size to count in total.
+ *
+ * @return Number of bytes available for allocation that meet the
+ * block size requirement
+ */
+u64 cvmx_bootmem_phy_available_mem(u64 min_block_size);
+
+/**
+ * Frees a named block.
+ *
+ * @param name   name of block to free
+ * @param flags  flags for passing options
+ *
+ * @return 0 on failure
+ *         1 on success
+ */
+int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags);
+
+/**
+ * Frees a block to the bootmem allocator list.  This must
+ * be used with care, as the size provided must match the size
+ * of the block that was allocated, or the list will become
+ * corrupted.
+ *
+ * IMPORTANT:  This is only intended to be used as part of named block
+ * frees and initial population of the free memory list.
+ *                                                      *
+ *
+ * @param phy_addr physical address of block
+ * @param size     size of block in bytes.
+ * @param flags    flags for passing options
+ *
+ * @return 1 on success,
+ *         0 on failure
+ */
+int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags);
+
+/**
+ * Prints the list of currently allocated named blocks
+ *
+ */
+void cvmx_bootmem_phy_named_block_print(void);
+
+/**
+ * Prints the list of available memory.
+ *
+ */
+void cvmx_bootmem_phy_list_print(void);
+
+/**
+ * This function initializes the free memory list used by cvmx_bootmem.
+ * This must be called before any allocations can be done.
+ *
+ * @param mem_size Total memory available, in bytes
+ *
+ * @param low_reserved_bytes Number of bytes to reserve (leave out of
+ * free list) at address 0x0.
+ *
+ * @param desc_buffer Buffer for the bootmem descriptor.  This must be
+ *                 a 32 bit addressable address.
+ *
+ * @return 1 on success
+ *         0 on failure
+ */
+s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size, u32 low_reserved_bytes,
+                                  struct cvmx_bootmem_desc *desc_buffer);
+
+/**
+ * This function initializes the free memory list used by cvmx_bootmem.
+ * This must be called before any allocations can be done.
+ *
+ * @param nodemask Nodemask - one bit per node (bit0->node0, bit1->node1,...)
+ *
+ * @param mem_size[] Array of memory sizes in MBytes per node ([0]->node0,...)
+ *
+ * @param low_reserved_bytes Number of bytes to reserve (leave out of
+ * free list) at address 0x0.
+ *
+ * @param desc_buffer Buffer for the bootmem descriptor.  This must be
+ *                 a 32 bit addressable address.
+ *
+ * @return 1 on success
+ *         0 on failure
+ */
+s64 cvmx_bootmem_phy_mem_list_init_multi(u8 nodemask, u32 mem_size[],
+                                        u32 low_reserved_bytes,
+                                        struct cvmx_bootmem_desc *desc_buffer);
+/**
+ * Locks the bootmem allocator.  This is useful in certain situations
+ * where multiple allocations must be made without being interrupted.
+ * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag.
+ *
+ */
+void cvmx_bootmem_lock(void);
+
+/**
+ * Unlocks the bootmem allocator.  This is useful in certain situations
+ * where multiple allocations must be made without being interrupted.
+ * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag.
+ *
+ */
+void cvmx_bootmem_unlock(void);
+
+/**
+ * Internal use function to get the current descriptor pointer
+ */
+void *__cvmx_bootmem_internal_get_desc_ptr(void);
+
+/**
+ * Internal use.  This is userd to get a pointer to a physical
+ * address.  For linux n32 the physical address in mmaped to a virtual
+ * address and the virtual address is returned.  For n64 the address
+ * is converted to an xkphys address and the xkhpys address is
+ * returned.
+ */
+void *__cvmx_phys_addr_to_ptr(u64 phys, int size);
+const struct cvmx_bootmem_named_block_desc *
+__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags);
+void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr,
+                                          u64 max_addr, u64 align,
+                                          const char *name, u32 flags);
+u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment,
+                                u64 min_addr, u64 max_addr);
+
+#endif /*   __CVMX_BOOTMEM_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-coremask.h b/arch/mips/mach-octeon/include/mach/cvmx-coremask.h
new file mode 100644 (file)
index 0000000..c34ff46
--- /dev/null
@@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/**
+ * Module to support operations on bitmap of cores. Coremask can be used to
+ * select a specific core, a group of cores, or all available cores, for
+ * initialization and differentiation of roles within a single shared binary
+ * executable image.
+ *
+ * The core numbers used in this file are the same value as what is found in
+ * the COP0_EBASE register and the rdhwr 0 instruction.
+ *
+ * For the CN78XX and other multi-node environments the core numbers are not
+ * contiguous.  The core numbers for the CN78XX are as follows:
+ *
+ * Node 0:     Cores 0 - 47
+ * Node 1:     Cores 128 - 175
+ * Node 2:     Cores 256 - 303
+ * Node 3:     Cores 384 - 431
+ *
+ * The coremask environment generally tries to be node agnostic in order to
+ * provide future compatibility if more cores are added to future processors
+ * or more nodes are supported.
+ */
+
+#ifndef __CVMX_COREMASK_H__
+#define __CVMX_COREMASK_H__
+
+#include "cvmx-regs.h"
+
+/* bits per holder */
+#define CVMX_COREMASK_HLDRSZ   ((int)(sizeof(u64) * 8))
+
+/** Maximum allowed cores per node */
+#define CVMX_COREMASK_MAX_CORES_PER_NODE       (1 << CVMX_NODE_NO_SHIFT)
+
+/** Maximum number of bits actually used in the coremask */
+#define CVMX_MAX_USED_CORES_BMP        (1 << (CVMX_NODE_NO_SHIFT + CVMX_NODE_BITS))
+
+/* the number of valid bits in and the mask of the most significant holder */
+#define CVMX_COREMASK_MSHLDR_NBITS                     \
+       (CVMX_MIPS_MAX_CORES % CVMX_COREMASK_HLDRSZ)
+
+#define CVMX_COREMASK_MSHLDR_MASK                              \
+       ((CVMX_COREMASK_MSHLDR_NBITS) ?                         \
+        (((u64)1 << CVMX_COREMASK_MSHLDR_NBITS) - 1) :         \
+        ((u64)-1))
+
+/* cvmx_coremask size in u64 */
+#define CVMX_COREMASK_BMPSZ                                    \
+       ((int)(CVMX_MIPS_MAX_CORES / CVMX_COREMASK_HLDRSZ +     \
+              (CVMX_COREMASK_MSHLDR_NBITS != 0)))
+
+#define CVMX_COREMASK_USED_BMPSZ                               \
+       (CVMX_MAX_USED_CORES_BMP / CVMX_COREMASK_HLDRSZ)
+
+#define CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core)                    \
+       ((((node) << CVMX_NODE_NO_SHIFT) + (core)) / CVMX_COREMASK_HLDRSZ)
+/**
+ * Maximum available coremask.
+ */
+#define CVMX_COREMASK_MAX                              \
+       { {                                             \
+                       0x0000FFFFFFFFFFFF, 0,          \
+                               0x0000FFFFFFFFFFFF, 0,  \
+                               0x0000FFFFFFFFFFFF, 0,  \
+                               0x0000FFFFFFFFFFFF, 0,  \
+                               0, 0,                   \
+                               0, 0,                   \
+                               0, 0,                   \
+                               0, 0} }
+
+/**
+ * Empty coremask
+ */
+#define CVMX_COREMASK_EMPTY                                    \
+       { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }
+
+struct cvmx_coremask {
+       u64 coremask_bitmap[CVMX_COREMASK_BMPSZ];
+};
+
+/**
+ * Macro to iterate through all available cores in a coremask
+ *
+ * @param core - core variable to use to iterate
+ * @param pcm - pointer to core mask
+ *
+ * Use this like a for statement
+ */
+#define cvmx_coremask_for_each_core(core, pcm)                 \
+       for ((core) = -1;                                       \
+            (core) = cvmx_coremask_next_core((core), pcm),     \
+                    (core) >= 0;)
+
+/**
+ * Given a node and node mask, return the next available node.
+ *
+ * @param node         starting node number
+ * @param node_mask    node mask to use to find the next node
+ *
+ * @return next node number or -1 if no more nodes are available
+ */
+static inline int cvmx_coremask_next_node(int node, u8 node_mask)
+{
+       int next_offset;
+
+       next_offset = __builtin_ffs(node_mask >> (node + 1));
+       if (next_offset == 0)
+               return -1;
+       else
+               return node + next_offset;
+}
+
+/**
+ * Iterate through all nodes in a node mask
+ *
+ * @param node         node iterator variable
+ * @param node_mask    mask to use for iterating
+ *
+ * Use this like a for statement
+ */
+#define cvmx_coremask_for_each_node(node, node_mask)           \
+       for ((node) = __builtin_ffs(node_mask) - 1;             \
+            (node) >= 0 && (node) < CVMX_MAX_NODES;            \
+            (node) = cvmx_coremask_next_node(node, node_mask))
+
+/**
+ * Is ``core'' set in the coremask?
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 1 if core is set and 0 if not.
+ */
+static inline int cvmx_coremask_is_core_set(const struct cvmx_coremask *pcm,
+                                           int core)
+{
+       int n, i;
+
+       n = core % CVMX_COREMASK_HLDRSZ;
+       i = core / CVMX_COREMASK_HLDRSZ;
+
+       return (pcm->coremask_bitmap[i] & ((u64)1 << n)) != 0;
+}
+
+/**
+ * Is ``current core'' set in the coremask?
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 1 if core is set and 0 if not.
+ */
+static inline int cvmx_coremask_is_self_set(const struct cvmx_coremask *pcm)
+{
+       return cvmx_coremask_is_core_set(pcm, (int)cvmx_get_core_num());
+}
+
+/**
+ * Is coremask empty?
+ * @param pcm is the pointer to the coremask.
+ * @return 1 if *pcm is empty (all zeros), 0 if not empty.
+ */
+static inline int cvmx_coremask_is_empty(const struct cvmx_coremask *pcm)
+{
+       int i;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+               if (pcm->coremask_bitmap[i] != 0)
+                       return 0;
+
+       return 1;
+}
+
+/**
+ * Set ``core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_set_core(struct cvmx_coremask *pcm, int core)
+{
+       int n, i;
+
+       n = core % CVMX_COREMASK_HLDRSZ;
+       i = core / CVMX_COREMASK_HLDRSZ;
+       pcm->coremask_bitmap[i] |= ((u64)1 << n);
+
+       return 0;
+}
+
+/**
+ * Set ``current core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_set_self(struct cvmx_coremask *pcm)
+{
+       return cvmx_coremask_set_core(pcm, (int)cvmx_get_core_num());
+}
+
+/**
+ * Clear ``core'' from the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_clear_core(struct cvmx_coremask *pcm, int core)
+{
+       int n, i;
+
+       n = core % CVMX_COREMASK_HLDRSZ;
+       i = core / CVMX_COREMASK_HLDRSZ;
+       pcm->coremask_bitmap[i] &= ~((u64)1 << n);
+
+       return 0;
+}
+
+/**
+ * Clear ``current core'' from the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_clear_self(struct cvmx_coremask *pcm)
+{
+       return cvmx_coremask_clear_core(pcm, cvmx_get_core_num());
+}
+
+/**
+ * Toggle ``core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_toggle_core(struct cvmx_coremask *pcm, int core)
+{
+       int n, i;
+
+       n = core % CVMX_COREMASK_HLDRSZ;
+       i = core / CVMX_COREMASK_HLDRSZ;
+       pcm->coremask_bitmap[i] ^= ((u64)1 << n);
+
+       return 0;
+}
+
+/**
+ * Toggle ``current core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_toggle_self(struct cvmx_coremask *pcm)
+{
+       return cvmx_coremask_toggle_core(pcm, cvmx_get_core_num());
+}
+
+/**
+ * Set the lower 64-bit of the coremask.
+ * @param pcm  pointer to coremask
+ * @param coremask_64  64-bit coremask to apply to the first node (0)
+ */
+static inline void cvmx_coremask_set64(struct cvmx_coremask *pcm,
+                                      u64 coremask_64)
+{
+       pcm->coremask_bitmap[0] = coremask_64;
+}
+
+/**
+ * Set the 64-bit of the coremask for a particular node.
+ * @param pcm  pointer to coremask
+ * @param node node to set
+ * @param coremask_64  64-bit coremask to apply to the specified node
+ */
+static inline void cvmx_coremask_set64_node(struct cvmx_coremask *pcm,
+                                           u8 node,
+                                           u64 coremask_64)
+{
+       pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)] =
+               coremask_64;
+}
+
+/**
+ * Gets the lower 64-bits of the coremask
+ *
+ * @param[in] pcm - pointer to coremask
+ * @return 64-bit coremask for the first node
+ */
+static inline u64 cvmx_coremask_get64(const struct cvmx_coremask *pcm)
+{
+       return pcm->coremask_bitmap[0];
+}
+
+/**
+ * Gets the lower 64-bits of the coremask for the specified node
+ *
+ * @param[in] pcm - pointer to coremask
+ * @param node - node to get coremask for
+ * @return 64-bit coremask for the first node
+ */
+static inline u64 cvmx_coremask_get64_node(const struct cvmx_coremask *pcm,
+                                          u8 node)
+{
+       return pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)];
+}
+
+/**
+ * Gets the lower 32-bits of the coremask for compatibility
+ *
+ * @param[in] pcm - pointer to coremask
+ * @return 32-bit coremask for the first node
+ * @deprecated This function is to maintain compatibility with older
+ *             SDK applications and may disappear at some point.
+ * This function is not compatible with the CN78XX or any other
+ * Octeon device with more than 32 cores.
+ */
+static inline u32 cvmx_coremask_get32(const struct cvmx_coremask *pcm)
+{
+       return pcm->coremask_bitmap[0] & 0xffffffff;
+}
+
+/*
+ * cvmx_coremask_cmp() returns an integer less than, equal to, or
+ * greater than zero if *pcm1 is found, respectively, to be less than,
+ * to match, or be greater than *pcm2.
+ */
+static inline int cvmx_coremask_cmp(const struct cvmx_coremask *pcm1,
+                                   const struct cvmx_coremask *pcm2)
+{
+       int i;
+
+       /* Start from highest node for arithemtically correct result */
+       for (i = CVMX_COREMASK_USED_BMPSZ - 1; i >= 0; i--)
+               if (pcm1->coremask_bitmap[i] != pcm2->coremask_bitmap[i]) {
+                       return (pcm1->coremask_bitmap[i] >
+                               pcm2->coremask_bitmap[i]) ? 1 : -1;
+               }
+
+       return 0;
+}
+
+/*
+ * cvmx_coremask_OPx(pcm1, pcm2[, pcm3]), where OPx can be
+ * - and
+ * - or
+ * - xor
+ * - not
+ * ...
+ * For binary operators, pcm3 <-- pcm1 OPX pcm2.
+ * For unaries, pcm2 <-- OPx pcm1.
+ */
+#define CVMX_COREMASK_BINARY_DEFUN(binary_op, op)              \
+       static inline int cvmx_coremask_##binary_op(            \
+               struct cvmx_coremask *pcm1,                             \
+               const struct cvmx_coremask *pcm2,                       \
+               const struct cvmx_coremask *pcm3)                       \
+       {                                                       \
+               int i;                                          \
+                                                               \
+               for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)  \
+                       pcm1->coremask_bitmap[i] =              \
+                               pcm2->coremask_bitmap[i]        \
+                               op                              \
+                               pcm3->coremask_bitmap[i];       \
+                                                               \
+               return 0;                                       \
+       }
+
+#define CVMX_COREMASK_UNARY_DEFUN(unary_op, op)                        \
+       static inline int cvmx_coremask_##unary_op(             \
+               struct cvmx_coremask *pcm1,                             \
+               const struct cvmx_coremask *pcm2)                       \
+       {                                                       \
+               int i;                                          \
+                                                               \
+               for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)  \
+                       pcm1->coremask_bitmap[i] =              \
+                               op                              \
+                               pcm2->coremask_bitmap[i];       \
+                                                               \
+               return 0;                                       \
+       }
+
+/* cvmx_coremask_and(pcm1, pcm2, pcm3): pcm1 = pmc2 & pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(and, &)
+/* cvmx_coremask_or(pcm1, pcm2, pcm3): pcm1 = pmc2 | pmc3  */
+CVMX_COREMASK_BINARY_DEFUN(or, |)
+/* cvmx_coremask_xor(pcm1, pcm2, pcm3): pcm1 = pmc2 ^ pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(xor, ^)
+/* cvmx_coremask_maskoff(pcm1, pcm2, pcm3): pcm1 = pmc2 & ~pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(maskoff, & ~)
+/* cvmx_coremask_not(pcm1, pcm2): pcm1 = ~pcm2       */
+CVMX_COREMASK_UNARY_DEFUN(not, ~)
+/* cvmx_coremask_fill(pcm1, pcm2): pcm1 = -1      */
+CVMX_COREMASK_UNARY_DEFUN(fill, -1 |)
+/* cvmx_coremask_clear(pcm1, pcm2): pcm1 = 0     */
+CVMX_COREMASK_UNARY_DEFUN(clear, 0 &)
+/* cvmx_coremask_dup(pcm1, pcm2): pcm1 = pcm2       */
+CVMX_COREMASK_UNARY_DEFUN(dup, +)
+
+/*
+ * Macros using the unary functions defined w/
+ * CVMX_COREMASK_UNARY_DEFUN
+ * - set *pcm to its complement
+ * - set all bits in *pcm to 0
+ * - set all (valid) bits in *pcm to 1
+ */
+#define cvmx_coremask_complement(pcm)  cvmx_coremask_not(pcm, pcm)
+/* On clear, even clear the unused bits */
+#define cvmx_coremask_clear_all(pcm)                                   \
+       *(pcm) = (struct cvmx_coremask)CVMX_COREMASK_EMPTY
+#define cvmx_coremask_set_all(pcm)     cvmx_coremask_fill(pcm, NULL)
+
+/*
+ * convert a string of hex digits to struct cvmx_coremask
+ *
+ * @param pcm
+ * @param hexstr can be
+ *     - "[1-9A-Fa-f][0-9A-Fa-f]*", or
+ *     - "-1" to set the bits for all the cores.
+ * return
+ *      0 for success,
+ *     -1 for string too long (i.e., hexstr takes more bits than
+ *        CVMX_MIPS_MAX_CORES),
+ *     -2 for conversion problems from hex string to an unsigned
+ *        long long, e.g., non-hex char in hexstr, and
+ *     -3 for hexstr starting with '0'.
+ * NOTE:
+ *     This function clears the bitmask in *pcm before the conversion.
+ */
+int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr);
+
+/*
+ * convert a struct cvmx_coremask to a string of hex digits
+ *
+ * @param pcm
+ * @param hexstr is "[1-9A-Fa-f][0-9A-Fa-f]*"
+ *
+ * return 0.
+ */
+int cvmx_coremask_bmp2str(const struct cvmx_coremask *pcm, char *hexstr);
+
+/*
+ * Returns the index of the lowest bit in a coremask holder.
+ */
+static inline int cvmx_coremask_lowest_bit(u64 h)
+{
+       return __builtin_ctzll(h);
+}
+
+/*
+ * Returns the 0-based index of the highest bit in a coremask holder.
+ */
+static inline int cvmx_coremask_highest_bit(u64 h)
+{
+       return (64 - __builtin_clzll(h) - 1);
+}
+
+/**
+ * Returns the last core within the coremask and -1 when the coremask
+ * is empty.
+ *
+ * @param[in] pcm - pointer to coremask
+ * @returns last core set in the coremask or -1 if all clear
+ *
+ */
+static inline int cvmx_coremask_get_last_core(const struct cvmx_coremask *pcm)
+{
+       int i;
+       int found = -1;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) {
+               if (pcm->coremask_bitmap[i])
+                       found = i;
+       }
+
+       if (found == -1)
+               return -1;
+
+       return found * CVMX_COREMASK_HLDRSZ +
+               cvmx_coremask_highest_bit(pcm->coremask_bitmap[found]);
+}
+
+/**
+ * Returns the first core within the coremask and -1 when the coremask
+ * is empty.
+ *
+ * @param[in] pcm - pointer to coremask
+ * @returns first core set in the coremask or -1 if all clear
+ *
+ */
+static inline int cvmx_coremask_get_first_core(const struct cvmx_coremask *pcm)
+{
+       int i;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+               if (pcm->coremask_bitmap[i])
+                       break;
+
+       if (i == CVMX_COREMASK_USED_BMPSZ)
+               return -1;
+
+       return i * CVMX_COREMASK_HLDRSZ +
+               cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]);
+}
+
+/**
+ * Given a core and coremask, return the next available core in the coremask
+ * or -1 if none are available.
+ *
+ * @param core - starting core to check (can be -1 for core 0)
+ * @param pcm - pointer to coremask to check for the next core.
+ *
+ * @return next core following the core parameter or -1 if no more cores.
+ */
+static inline int cvmx_coremask_next_core(int core,
+                                         const struct cvmx_coremask *pcm)
+{
+       int n, i;
+
+       core++;
+       n = core % CVMX_COREMASK_HLDRSZ;
+       i = core / CVMX_COREMASK_HLDRSZ;
+
+       if (pcm->coremask_bitmap[i] != 0) {
+               for (; n < CVMX_COREMASK_HLDRSZ; n++)
+                       if (pcm->coremask_bitmap[i] & (1ULL << n))
+                               return ((i * CVMX_COREMASK_HLDRSZ) + n);
+       }
+
+       for (i = i + 1; i < CVMX_COREMASK_USED_BMPSZ; i++) {
+               if (pcm->coremask_bitmap[i] != 0)
+                       return (i * CVMX_COREMASK_HLDRSZ) +
+                               cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]);
+       }
+       return -1;
+}
+
+/**
+ * Compute coremask for count cores starting with start_core.
+ * Note that the coremask for multi-node processors may have
+ * gaps.
+ *
+ * @param[out]  pcm        pointer to core mask data structure
+ * @param      start_core starting code number
+ * @param       count      number of cores
+ *
+ */
+static inline void cvmx_coremask_set_cores(struct cvmx_coremask *pcm,
+                                          unsigned int start_core,
+                                          unsigned int count)
+{
+       int node;
+       int core;       /** Current core in node */
+       int cores_in_node;
+       int i;
+
+       assert(CVMX_MAX_CORES < CVMX_COREMASK_HLDRSZ);
+       node = start_core >> CVMX_NODE_NO_SHIFT;
+       core = start_core & ((1 << CVMX_NODE_NO_SHIFT) - 1);
+       assert(core < CVMX_MAX_CORES);
+
+       cvmx_coremask_clear_all(pcm);
+       while (count > 0) {
+               if (count + core > CVMX_MAX_CORES)
+                       cores_in_node = CVMX_MAX_CORES - core;
+               else
+                       cores_in_node = count;
+
+               i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core);
+               pcm->coremask_bitmap[i] = ((1ULL << cores_in_node) - 1) << core;
+               count -= cores_in_node;
+               core = 0;
+               node++;
+       }
+}
+
+/**
+ * Makes a copy of a coremask
+ *
+ * @param[out] dest - pointer to destination coremask
+ * @param[in]  src  - pointer to source coremask
+ */
+static inline void cvmx_coremask_copy(struct cvmx_coremask *dest,
+                                     const struct cvmx_coremask *src)
+{
+       memcpy(dest, src, sizeof(*dest));
+}
+
+/**
+ * Test to see if the specified core is first core in coremask.
+ *
+ * @param[in]  pcm  pointer to the coremask to test against
+ * @param[in]  core core to check
+ *
+ * @return  1 if the core is first core in the coremask, 0 otherwise
+ *
+ */
+static inline int cvmx_coremask_is_core_first_core(const struct cvmx_coremask *pcm,
+                                                  unsigned int core)
+{
+       int n, i;
+
+       n = core / CVMX_COREMASK_HLDRSZ;
+
+       for (i = 0; i < n; i++)
+               if (pcm->coremask_bitmap[i] != 0)
+                       return 0;
+
+       /* From now on we only care about the core number within an entry */
+       core &= (CVMX_COREMASK_HLDRSZ - 1);
+       if (__builtin_ffsll(pcm->coremask_bitmap[n]) < (core + 1))
+               return 0;
+
+       return (__builtin_ffsll(pcm->coremask_bitmap[n]) == core + 1);
+}
+
+/*
+ * NOTE:
+ * cvmx_coremask_is_first_core() was retired due to improper usage.
+ * For inquiring about the current core being the initializing
+ * core for an application, use cvmx_is_init_core().
+ * For simply inquring if the current core is numerically
+ * lowest in a given mask, use :
+ *     cvmx_coremask_is_core_first_core( pcm, dvmx_get_core_num())
+ */
+
+/**
+ * Returns the number of 1 bits set in a coremask
+ *
+ * @param[in] pcm - pointer to core mask
+ *
+ * @return number of bits set in the coremask
+ */
+static inline int cvmx_coremask_get_core_count(const struct cvmx_coremask *pcm)
+{
+       int i;
+       int count = 0;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+               count += __builtin_popcountll(pcm->coremask_bitmap[i]);
+
+       return count;
+}
+
+/**
+ * For multi-node systems, return the node a core belongs to.
+ *
+ * @param core - core number (0-1023)
+ *
+ * @return node number core belongs to
+ */
+static inline int cvmx_coremask_core_to_node(int core)
+{
+       return (core >> CVMX_NODE_NO_SHIFT) & CVMX_NODE_MASK;
+}
+
+/**
+ * Given a core number on a multi-node system, return the core number for a
+ * particular node.
+ *
+ * @param core - global core number
+ *
+ * @returns core number local to the node.
+ */
+static inline int cvmx_coremask_core_on_node(int core)
+{
+       return (core & ((1 << CVMX_NODE_NO_SHIFT) - 1));
+}
+
+/**
+ * Returns if one coremask is a subset of another coremask
+ *
+ * @param main - main coremask to test
+ * @param subset - subset coremask to test
+ *
+ * @return 0 if the subset contains cores not in the main coremask or 1 if
+ *         the subset is fully contained in the main coremask.
+ */
+static inline int cvmx_coremask_is_subset(const struct cvmx_coremask *main,
+                                         const struct cvmx_coremask *subset)
+{
+       int i;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+               if ((main->coremask_bitmap[i] & subset->coremask_bitmap[i]) !=
+                   subset->coremask_bitmap[i])
+                       return 0;
+       return 1;
+}
+
+/**
+ * Returns if one coremask intersects another coremask
+ *
+ * @param c1 - main coremask to test
+ * @param c2 - subset coremask to test
+ *
+ * @return 1 if coremask c1 intersects coremask c2, 0 if they are exclusive
+ */
+static inline int cvmx_coremask_intersects(const struct cvmx_coremask *c1,
+                                          const struct cvmx_coremask *c2)
+{
+       int i;
+
+       for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+               if ((c1->coremask_bitmap[i] & c2->coremask_bitmap[i]) != 0)
+                       return 1;
+       return 0;
+}
+
+/**
+ * Masks a single node of a coremask
+ *
+ * @param pcm - coremask to mask [inout]
+ * @param node       - node number to mask against
+ */
+static inline void cvmx_coremask_mask_node(struct cvmx_coremask *pcm, int node)
+{
+       int i;
+
+       for (i = 0; i < CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0); i++)
+               pcm->coremask_bitmap[i] = 0;
+
+       for (i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node + 1, 0);
+            i < CVMX_COREMASK_USED_BMPSZ; i++)
+               pcm->coremask_bitmap[i] = 0;
+}
+
+/**
+ * Prints out a coremask in the form of node X: 0x... 0x...
+ *
+ * @param[in] pcm - pointer to core mask
+ *
+ * @return nothing
+ */
+void cvmx_coremask_print(const struct cvmx_coremask *pcm);
+
+static inline void cvmx_coremask_dprint(const struct cvmx_coremask *pcm)
+{
+       if (IS_ENABLED(DEBUG))
+               cvmx_coremask_print(pcm);
+}
+
+struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm);
+
+int validate_coremask(struct cvmx_coremask *pcm);
+
+#endif /* __CVMX_COREMASK_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-fuse.h b/arch/mips/mach-octeon/include/mach/cvmx-fuse.h
new file mode 100644 (file)
index 0000000..a06a132
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __CVMX_FUSE_H__
+#define __CVMX_FUSE_H__
+
+/**
+ * Read a byte of fuse data
+ * @param node         node to read from
+ * @param byte_addr    address to read
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline u8 cvmx_fuse_read_byte_node(u8 node, int byte_addr)
+{
+       u64 val;
+
+       val = FIELD_PREP(MIO_FUS_RCMD_ADDR, byte_addr) | MIO_FUS_RCMD_PEND;
+       csr_wr_node(node, CVMX_MIO_FUS_RCMD, val);
+
+       do {
+               val = csr_rd_node(node, CVMX_MIO_FUS_RCMD);
+       } while (val & MIO_FUS_RCMD_PEND);
+
+       return FIELD_GET(MIO_FUS_RCMD_DAT, val);
+}
+
+/**
+ * Read a byte of fuse data
+ * @param byte_addr   address to read
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline u8 cvmx_fuse_read_byte(int byte_addr)
+{
+       return cvmx_fuse_read_byte_node(0, byte_addr);
+}
+
+/**
+ * Read a single fuse bit
+ *
+ * @param node   Node number
+ * @param fuse   Fuse number (0-1024)
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline int cvmx_fuse_read_node(u8 node, int fuse)
+{
+       return (cvmx_fuse_read_byte_node(node, fuse >> 3) >> (fuse & 0x7)) & 1;
+}
+
+/**
+ * Read a single fuse bit
+ *
+ * @param fuse   Fuse number (0-1024)
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline int cvmx_fuse_read(int fuse)
+{
+       return cvmx_fuse_read_node(0, fuse);
+}
+
+static inline int cvmx_octeon_fuse_locked(void)
+{
+       return cvmx_fuse_read(123);
+}
+
+#endif /* __CVMX_FUSE_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-regs.h b/arch/mips/mach-octeon/include/mach/cvmx-regs.h
new file mode 100644 (file)
index 0000000..b84fc9f
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ */
+
+#ifndef __CVMX_REGS_H__
+#define __CVMX_REGS_H__
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+/* General defines */
+#define CVMX_MAX_CORES         48
+/* Maximum # of bits to define core in node */
+#define CVMX_NODE_NO_SHIFT     7
+#define CVMX_NODE_BITS         2       /* Number of bits to define a node */
+#define CVMX_MAX_NODES         (1 << CVMX_NODE_BITS)
+#define CVMX_NODE_MASK         (CVMX_MAX_NODES - 1)
+#define CVMX_NODE_IO_SHIFT     36
+#define CVMX_NODE_MEM_SHIFT    40
+#define CVMX_NODE_IO_MASK      ((u64)CVMX_NODE_MASK << CVMX_NODE_IO_SHIFT)
+
+#define CVMX_MIPS_MAX_CORE_BITS        10      /* Maximum # of bits to define cores */
+#define CVMX_MIPS_MAX_CORES    (1 << CVMX_MIPS_MAX_CORE_BITS)
+
+#define MAX_CORE_TADS          8
+
+#define CAST_ULL(v)            ((unsigned long long)(v))
+#define CASTPTR(type, v)       ((type *)(long)(v))
+
+/* Regs */
+#define CVMX_CIU_PP_RST                0x0001010000000100ULL
+#define CVMX_CIU3_NMI          0x0001010000000160ULL
+#define CVMX_CIU_FUSE          0x00010100000001a0ULL
+#define CVMX_CIU_NMI           0x0001070000000718ULL
+
+#define CVMX_MIO_BOOT_LOC_CFGX(x) (0x0001180000000080ULL + ((x) & 1) * 8)
+#define MIO_BOOT_LOC_CFG_BASE          GENMASK_ULL(27, 3)
+#define MIO_BOOT_LOC_CFG_EN            BIT_ULL(31)
+
+#define CVMX_MIO_BOOT_LOC_ADR  0x0001180000000090ULL
+#define MIO_BOOT_LOC_ADR_ADR           GENMASK_ULL(7, 3)
+
+#define CVMX_MIO_BOOT_LOC_DAT  0x0001180000000098ULL
+
+#define CVMX_MIO_FUS_DAT2      0x0001180000001410ULL
+#define MIO_FUS_DAT2_NOCRYPTO          BIT_ULL(26)
+#define MIO_FUS_DAT2_NOMUL             BIT_ULL(27)
+#define MIO_FUS_DAT2_DORM_CRYPTO       BIT_ULL(34)
+
+#define CVMX_MIO_FUS_RCMD      0x0001180000001500ULL
+#define MIO_FUS_RCMD_ADDR              GENMASK_ULL(7, 0)
+#define MIO_FUS_RCMD_PEND              BIT_ULL(12)
+#define MIO_FUS_RCMD_DAT               GENMASK_ULL(23, 16)
+
+#define CVMX_RNM_CTL_STATUS    0x0001180040000000ULL
+#define RNM_CTL_STATUS_EER_VAL         BIT_ULL(9)
+
+/* turn the variable name into a string */
+#define CVMX_TMP_STR(x)                CVMX_TMP_STR2(x)
+#define CVMX_TMP_STR2(x)       #x
+
+#define CVMX_RDHWRNV(result, regstr)                                   \
+       asm volatile ("rdhwr %[rt],$" CVMX_TMP_STR(regstr) : [rt] "=d" (result))
+
+#define CVMX_SYNCW                                     \
+       asm volatile ("syncw\nsyncw\n" : : : "memory")
+
+/* ToDo: Currently only node = 0 supported */
+static inline u64 csr_rd_node(int node, u64 addr)
+{
+       void __iomem *base;
+
+       base = ioremap_nocache(addr, 0x100);
+       return ioread64(base);
+}
+
+static inline u64 csr_rd(u64 addr)
+{
+       return csr_rd_node(0, addr);
+}
+
+static inline void csr_wr_node(int node, u64 addr, u64 val)
+{
+       void __iomem *base;
+
+       base = ioremap_nocache(addr, 0x100);
+       iowrite64(val, base);
+}
+
+static inline void csr_wr(u64 addr, u64 val)
+{
+       csr_wr_node(0, addr, val);
+}
+
+/*
+ * We need to use the volatile access here, otherwise the IO accessor
+ * functions might swap the bytes
+ */
+static inline u64 cvmx_read64_uint64(u64 addr)
+{
+       return *(volatile u64 *)addr;
+}
+
+static inline void cvmx_write64_uint64(u64 addr, u64 val)
+{
+       *(volatile u64 *)addr = val;
+}
+
+static inline u32 cvmx_read64_uint32(u64 addr)
+{
+       return *(volatile u32 *)addr;
+}
+
+static inline void cvmx_write64_uint32(u64 addr, u32 val)
+{
+       *(volatile u32 *)addr = val;
+}
+
+static inline void *cvmx_phys_to_ptr(u64 addr)
+{
+       return (void *)CKSEG0ADDR(addr);
+}
+
+static inline u64 cvmx_ptr_to_phys(void *ptr)
+{
+       return virt_to_phys(ptr);
+}
+
+/**
+ * Number of the Core on which the program is currently running.
+ *
+ * @return core number
+ */
+static inline unsigned int cvmx_get_core_num(void)
+{
+       unsigned int core_num;
+
+       CVMX_RDHWRNV(core_num, 0);
+       return core_num;
+}
+
+#endif /* __CVMX_REGS_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h b/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h
new file mode 100644 (file)
index 0000000..3b4cba9
--- /dev/null
@@ -0,0 +1,4574 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __CVMX_LMCX_DEFS_H__
+#define __CVMX_LMCX_DEFS_H__
+
+#define CVMX_LMCX_BANK_CONFLICT1(offs)                 \
+       ((0x000360ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_BANK_CONFLICT2(offs)                 \
+       ((0x000368ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_BIST_RESULT(offs)                    \
+       ((0x0000F8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_CHAR_CTL(offs)                       \
+       ((0x000220ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_DQ_ERR_COUNT(offs)              \
+       ((0x000040ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK0(offs)                     \
+       ((0x000228ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK1(offs)                     \
+       ((0x000230ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK2(offs)                     \
+       ((0x000238ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK3(offs)                     \
+       ((0x000240ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK4(offs)                     \
+       ((0x000318ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_COMP_CTL(offs)                       \
+       ((0x000028ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_COMP_CTL2(offs)                      \
+       ((0x0001B8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CONFIG(offs)                         \
+       ((0x000188ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CONTROL(offs)                                \
+       ((0x000190ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CTL(offs)                            \
+       ((0x000010ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_CTL1(offs)                           \
+       ((0x000090ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DBTRAIN_CTL(offs)                    \
+       ((0x0003F8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DCLK_CNT(offs)                       \
+       ((0x0001E0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DCLK_CNT_HI(offs)                    \
+       ((0x000070ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DCLK_CNT_LO(offs)                    \
+       ((0x000068ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DCLK_CTL(offs)                       \
+       ((0x0000B8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DDR2_CTL(offs)                       \
+       ((0x000018ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DDR4_DIMM_CTL(offs)                  \
+       ((0x0003F0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DDR_PLL_CTL(offs)                    \
+       ((0x000258ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DELAY_CFG(offs)                      \
+       ((0x000088ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DIMMX_DDR4_PARAMS0(offs, id)                         \
+       ((0x0000D0ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMMX_DDR4_PARAMS1(offs, id)                         \
+       ((0x000140ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMMX_PARAMS(offs, id)                               \
+       ((0x000270ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMM_CTL(offs)                       \
+       ((0x000310ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DLL_CTL(offs)                                \
+       ((0x0000C0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DLL_CTL2(offs)                       \
+       ((0x0001C8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DLL_CTL3(offs)                       \
+       ((0x000218ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_ECC_PARITY_TEST(offs)                        \
+       ((0x000108ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_EXT_CONFIG(offs)                     \
+       ((0x000030ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_EXT_CONFIG2(offs)                    \
+       ((0x000090ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE0(offs)               \
+       ((0x000340ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE1(offs)               \
+       ((0x000348ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE2(offs)               \
+       ((0x000350ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_IFB_CNT(offs)                                \
+       ((0x0001D0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_IFB_CNT_HI(offs)                     \
+       ((0x000050ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_IFB_CNT_LO(offs)                     \
+       ((0x000048ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_INT(offs)                            \
+       ((0x0001F0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_INT_EN(offs)                         \
+       ((0x0001E8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_LANEX_CRC_SWIZ(x, id)                                        \
+       ((0x000380ull) + (((offs) & 15) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_MEM_CFG0(offs)                       \
+       ((0x000000ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_MEM_CFG1(offs)                       \
+       ((0x000008ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS0(offs)                        \
+       ((0x0001A8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS1(offs)                        \
+       ((0x000260ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS2(offs)                        \
+       ((0x000050ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS3(offs)                        \
+       ((0x000058ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA0(offs)                      \
+       ((0x000070ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA1(offs)                      \
+       ((0x000078ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA2(offs)                      \
+       ((0x000080ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MR_MPR_CTL(offs)                     \
+       ((0x000068ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_NS_CTL(offs)                         \
+       ((0x000178ull) + ((offs) & 3) * 0x1000000ull)
+
+static inline uint64_t CVMX_LMCX_NXM(unsigned long offs)
+{
+       switch (cvmx_get_octeon_family()) {
+       case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+               return (0x0000C8ull) + (offs) * 0x60000000ull;
+       case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+               return (0x0000C8ull) + (offs) * 0x1000000ull;
+       case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+               if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+                       return (0x0000C8ull) + (offs) * 0x1000000ull;
+               if (OCTEON_IS_MODEL(OCTEON_CN78XX))
+                       return (0x0000C8ull) + (offs) * 0x1000000ull;
+       case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+               return (0x0000C8ull) + (offs) * 0x1000000ull;
+       }
+       return (0x0000C8ull) + (offs) * 0x1000000ull;
+}
+
+#define CVMX_LMCX_NXM_FADR(offs)                       \
+       ((0x000028ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_OPS_CNT(offs)                                \
+       ((0x0001D8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_OPS_CNT_HI(offs)                     \
+       ((0x000060ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_OPS_CNT_LO(offs)                     \
+       ((0x000058ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PHY_CTL(offs)                                \
+       ((0x000210ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_PHY_CTL2(offs)                       \
+       ((0x000250ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_PLL_BWCTL(offs)              \
+       ((0x000040ull))
+#define CVMX_LMCX_PLL_CTL(offs)                                \
+       ((0x0000A8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PLL_STATUS(offs)                     \
+       ((0x0000B0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PPR_CTL(offs)                                \
+       ((0x0003E0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_READ_LEVEL_CTL(offs)                 \
+       ((0x000140ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_READ_LEVEL_DBG(offs)                 \
+       ((0x000148ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_READ_LEVEL_RANKX(offs, id)                           \
+       ((0x000100ull) + (((offs) & 3) + ((id) & 1) * 0xC000000ull) * 8)
+#define CVMX_LMCX_REF_STATUS(offs)                     \
+       ((0x0000A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RESET_CTL(offs)                      \
+       ((0x000180ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RETRY_CONFIG(offs)                   \
+       ((0x000110ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RETRY_STATUS(offs)                   \
+       ((0x000118ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_CTL(offs)                     \
+       ((0x0002A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_DBG(offs)                     \
+       ((0x0002A8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_RANKX(offs, id)                               \
+       ((0x000280ull) + (((offs) & 3) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_RODT_COMP_CTL(offs)                  \
+       ((0x0000A0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_RODT_CTL(offs)                       \
+       ((0x000078ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_RODT_MASK(offs)                      \
+       ((0x000268ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLED_FADR(offs)                 \
+       ((0x000330ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG0(offs)                  \
+       ((0x000320ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG1(offs)                  \
+       ((0x000328ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG2(offs)                  \
+       ((0x000338ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SEQ_CTL(offs)                                \
+       ((0x000048ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL0(offs)                      \
+       ((0x0001F8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL1(offs)                      \
+       ((0x000200ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL2(offs)                      \
+       ((0x000208ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL3(offs)                      \
+       ((0x000248ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS0(offs)                 \
+       ((0x000198ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS1(offs)                 \
+       ((0x0001A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS2(offs)                 \
+       ((0x000060ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TRO_CTL(offs)                                \
+       ((0x000248ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TRO_STAT(offs)                       \
+       ((0x000250ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_WLEVEL_CTL(offs)                     \
+       ((0x000300ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_WLEVEL_DBG(offs)                     \
+       ((0x000308ull) + ((offs) & 3) * 0x1000000ull)
+
+static inline uint64_t CVMX_LMCX_WLEVEL_RANKX(unsigned long offs,
+                                             unsigned long id)
+{
+       switch (cvmx_get_octeon_family()) {
+       case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+               return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+       case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+               return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+       case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+               if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+                       return (0x0002C0ull) + ((offs) +
+                                               (id) * 0x200000ull) * 8;
+               if (OCTEON_IS_MODEL(OCTEON_CN78XX))
+                       return (0x0002C0ull) + ((offs) +
+                                               (id) * 0x200000ull) * 8;
+
+       case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+               return (0x0002B0ull) + ((offs) + (id) * 0x0ull) * 8;
+       case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+       case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+               return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8;
+       case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+               return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8;
+       }
+       return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+}
+
+#define CVMX_LMCX_WODT_CTL0(offs)                      \
+       ((0x000030ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_WODT_CTL1(offs)                      \
+       ((0x000080ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_WODT_MASK(offs)                      \
+       ((0x0001B0ull) + ((offs) & 3) * 0x1000000ull)
+
+/**
+ * cvmx_lmc#_char_ctl
+ *
+ * This register provides an assortment of various control fields needed
+ * to characterize the DDR3 interface.
+ */
+union cvmx_lmcx_char_ctl {
+       u64 u64;
+       struct cvmx_lmcx_char_ctl_s {
+               uint64_t reserved_54_63:10;
+               uint64_t dq_char_byte_check:1;
+               uint64_t dq_char_check_lock:1;
+               uint64_t dq_char_check_enable:1;
+               uint64_t dq_char_bit_sel:3;
+               uint64_t dq_char_byte_sel:4;
+               uint64_t dr:1;
+               uint64_t skew_on:1;
+               uint64_t en:1;
+               uint64_t sel:1;
+               uint64_t prog:8;
+               uint64_t prbs:32;
+       } s;
+       struct cvmx_lmcx_char_ctl_cn61xx {
+               uint64_t reserved_44_63:20;
+               uint64_t dr:1;
+               uint64_t skew_on:1;
+               uint64_t en:1;
+               uint64_t sel:1;
+               uint64_t prog:8;
+               uint64_t prbs:32;
+       } cn61xx;
+       struct cvmx_lmcx_char_ctl_cn63xx {
+               uint64_t reserved_42_63:22;
+               uint64_t en:1;
+               uint64_t sel:1;
+               uint64_t prog:8;
+               uint64_t prbs:32;
+       } cn63xx;
+       struct cvmx_lmcx_char_ctl_cn63xx cn63xxp1;
+       struct cvmx_lmcx_char_ctl_cn61xx cn66xx;
+       struct cvmx_lmcx_char_ctl_cn61xx cn68xx;
+       struct cvmx_lmcx_char_ctl_cn63xx cn68xxp1;
+       struct cvmx_lmcx_char_ctl_cn70xx {
+               uint64_t reserved_53_63:11;
+               uint64_t dq_char_check_lock:1;
+               uint64_t dq_char_check_enable:1;
+               uint64_t dq_char_bit_sel:3;
+               uint64_t dq_char_byte_sel:4;
+               uint64_t dr:1;
+               uint64_t skew_on:1;
+               uint64_t en:1;
+               uint64_t sel:1;
+               uint64_t prog:8;
+               uint64_t prbs:32;
+       } cn70xx;
+       struct cvmx_lmcx_char_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_char_ctl_s cn73xx;
+       struct cvmx_lmcx_char_ctl_s cn78xx;
+       struct cvmx_lmcx_char_ctl_s cn78xxp1;
+       struct cvmx_lmcx_char_ctl_cn61xx cnf71xx;
+       struct cvmx_lmcx_char_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_comp_ctl2
+ *
+ * LMC_COMP_CTL2 = LMC Compensation control
+ *
+ */
+union cvmx_lmcx_comp_ctl2 {
+       u64 u64;
+       struct cvmx_lmcx_comp_ctl2_s {
+               uint64_t reserved_51_63:13;
+               uint64_t rclk_char_mode:1;
+               uint64_t reserved_40_49:10;
+               uint64_t ptune_offset:4;
+               uint64_t reserved_12_35:24;
+               uint64_t cmd_ctl:4;
+               uint64_t ck_ctl:4;
+               uint64_t dqx_ctl:4;
+       } s;
+       struct cvmx_lmcx_comp_ctl2_cn61xx {
+               uint64_t reserved_34_63:30;
+               uint64_t ddr__ptune:4;
+               uint64_t ddr__ntune:4;
+               uint64_t m180:1;
+               uint64_t byp:1;
+               uint64_t ptune:4;
+               uint64_t ntune:4;
+               uint64_t rodt_ctl:4;
+               uint64_t cmd_ctl:4;
+               uint64_t ck_ctl:4;
+               uint64_t dqx_ctl:4;
+       } cn61xx;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cn63xx;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cn63xxp1;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cn66xx;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cn68xx;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cn68xxp1;
+       struct cvmx_lmcx_comp_ctl2_cn70xx {
+               uint64_t reserved_51_63:13;
+               uint64_t rclk_char_mode:1;
+               uint64_t ddr__ptune:5;
+               uint64_t ddr__ntune:5;
+               uint64_t ptune_offset:4;
+               uint64_t ntune_offset:4;
+               uint64_t m180:1;
+               uint64_t byp:1;
+               uint64_t ptune:5;
+               uint64_t ntune:5;
+               uint64_t rodt_ctl:4;
+               uint64_t control_ctl:4;
+               uint64_t cmd_ctl:4;
+               uint64_t ck_ctl:4;
+               uint64_t dqx_ctl:4;
+       } cn70xx;
+       struct cvmx_lmcx_comp_ctl2_cn70xx cn70xxp1;
+       struct cvmx_lmcx_comp_ctl2_cn70xx cn73xx;
+       struct cvmx_lmcx_comp_ctl2_cn70xx cn78xx;
+       struct cvmx_lmcx_comp_ctl2_cn70xx cn78xxp1;
+       struct cvmx_lmcx_comp_ctl2_cn61xx cnf71xx;
+       struct cvmx_lmcx_comp_ctl2_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_config
+ *
+ * This register controls certain parameters required for memory configuration.
+ * Note the following:
+ * * Priority order for hardware write operations to
+ * LMC()_CONFIG/LMC()_FADR/LMC()_ECC_SYND: DED error > SEC error.
+ * * The self-refresh entry sequence(s) power the DLL up/down (depending on
+ * LMC()_MODEREG_PARAMS0[DLL]) when LMC()_CONFIG[SREF_WITH_DLL] is set.
+ * * Prior to the self-refresh exit sequence, LMC()_MODEREG_PARAMS0 should
+ * be reprogrammed
+ * (if needed) to the appropriate values.
+ *
+ * See LMC initialization sequence for the LMC bringup sequence.
+ */
+union cvmx_lmcx_config {
+       u64 u64;
+       struct cvmx_lmcx_config_s {
+               uint64_t lrdimm_ena:1;
+               uint64_t bg2_enable:1;
+               uint64_t mode_x4dev:1;
+               uint64_t mode32b:1;
+               uint64_t scrz:1;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t reserved_18_39:22;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } s;
+       struct cvmx_lmcx_config_cn61xx {
+               uint64_t reserved_61_63:3;
+               uint64_t mode32b:1;
+               uint64_t scrz:1;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t sequence:3;
+               uint64_t ref_zqcs_int:19;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } cn61xx;
+       struct cvmx_lmcx_config_cn63xx {
+               uint64_t reserved_59_63:5;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t sequence:3;
+               uint64_t ref_zqcs_int:19;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } cn63xx;
+       struct cvmx_lmcx_config_cn63xxp1 {
+               uint64_t reserved_55_63:9;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t sequence:3;
+               uint64_t ref_zqcs_int:19;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } cn63xxp1;
+       struct cvmx_lmcx_config_cn66xx {
+               uint64_t reserved_60_63:4;
+               uint64_t scrz:1;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t sequence:3;
+               uint64_t ref_zqcs_int:19;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } cn66xx;
+       struct cvmx_lmcx_config_cn63xx cn68xx;
+       struct cvmx_lmcx_config_cn63xx cn68xxp1;
+       struct cvmx_lmcx_config_cn70xx {
+               uint64_t reserved_63_63:1;
+               uint64_t bg2_enable:1;
+               uint64_t mode_x4dev:1;
+               uint64_t mode32b:1;
+               uint64_t scrz:1;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t ref_zqcs_int:22;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t reserved_0_0:1;
+       } cn70xx;
+       struct cvmx_lmcx_config_cn70xx cn70xxp1;
+       struct cvmx_lmcx_config_cn73xx {
+               uint64_t lrdimm_ena:1;
+               uint64_t bg2_enable:1;
+               uint64_t mode_x4dev:1;
+               uint64_t mode32b:1;
+               uint64_t scrz:1;
+               uint64_t early_unload_d1_r1:1;
+               uint64_t early_unload_d1_r0:1;
+               uint64_t early_unload_d0_r1:1;
+               uint64_t early_unload_d0_r0:1;
+               uint64_t init_status:4;
+               uint64_t mirrmask:4;
+               uint64_t rankmask:4;
+               uint64_t rank_ena:1;
+               uint64_t sref_with_dll:1;
+               uint64_t early_dqx:1;
+               uint64_t ref_zqcs_int:22;
+               uint64_t reset:1;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t reserved_0_0:1;
+       } cn73xx;
+       struct cvmx_lmcx_config_cn73xx cn78xx;
+       struct cvmx_lmcx_config_cn73xx cn78xxp1;
+       struct cvmx_lmcx_config_cn61xx cnf71xx;
+       struct cvmx_lmcx_config_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_control
+ *
+ * LMC_CONTROL = LMC Control
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_control {
+       u64 u64;
+       struct cvmx_lmcx_control_s {
+               uint64_t scramble_ena:1;
+               uint64_t thrcnt:12;
+               uint64_t persub:8;
+               uint64_t thrmax:4;
+               uint64_t crm_cnt:5;
+               uint64_t crm_thr:5;
+               uint64_t crm_max:5;
+               uint64_t rodt_bprch:1;
+               uint64_t wodt_bprch:1;
+               uint64_t bprch:2;
+               uint64_t ext_zqcs_dis:1;
+               uint64_t int_zqcs_dis:1;
+               uint64_t auto_dclkdis:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t nxm_write_en:1;
+               uint64_t elev_prio_dis:1;
+               uint64_t inorder_wr:1;
+               uint64_t inorder_rd:1;
+               uint64_t throttle_wr:1;
+               uint64_t throttle_rd:1;
+               uint64_t fprch2:2;
+               uint64_t pocas:1;
+               uint64_t ddr2t:1;
+               uint64_t bwcnt:1;
+               uint64_t rdimm_ena:1;
+       } s;
+       struct cvmx_lmcx_control_s cn61xx;
+       struct cvmx_lmcx_control_cn63xx {
+               uint64_t reserved_24_63:40;
+               uint64_t rodt_bprch:1;
+               uint64_t wodt_bprch:1;
+               uint64_t bprch:2;
+               uint64_t ext_zqcs_dis:1;
+               uint64_t int_zqcs_dis:1;
+               uint64_t auto_dclkdis:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t nxm_write_en:1;
+               uint64_t elev_prio_dis:1;
+               uint64_t inorder_wr:1;
+               uint64_t inorder_rd:1;
+               uint64_t throttle_wr:1;
+               uint64_t throttle_rd:1;
+               uint64_t fprch2:2;
+               uint64_t pocas:1;
+               uint64_t ddr2t:1;
+               uint64_t bwcnt:1;
+               uint64_t rdimm_ena:1;
+       } cn63xx;
+       struct cvmx_lmcx_control_cn63xx cn63xxp1;
+       struct cvmx_lmcx_control_cn66xx {
+               uint64_t scramble_ena:1;
+               uint64_t reserved_24_62:39;
+               uint64_t rodt_bprch:1;
+               uint64_t wodt_bprch:1;
+               uint64_t bprch:2;
+               uint64_t ext_zqcs_dis:1;
+               uint64_t int_zqcs_dis:1;
+               uint64_t auto_dclkdis:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t nxm_write_en:1;
+               uint64_t elev_prio_dis:1;
+               uint64_t inorder_wr:1;
+               uint64_t inorder_rd:1;
+               uint64_t throttle_wr:1;
+               uint64_t throttle_rd:1;
+               uint64_t fprch2:2;
+               uint64_t pocas:1;
+               uint64_t ddr2t:1;
+               uint64_t bwcnt:1;
+               uint64_t rdimm_ena:1;
+       } cn66xx;
+       struct cvmx_lmcx_control_cn68xx {
+               uint64_t reserved_63_63:1;
+               uint64_t thrcnt:12;
+               uint64_t persub:8;
+               uint64_t thrmax:4;
+               uint64_t crm_cnt:5;
+               uint64_t crm_thr:5;
+               uint64_t crm_max:5;
+               uint64_t rodt_bprch:1;
+               uint64_t wodt_bprch:1;
+               uint64_t bprch:2;
+               uint64_t ext_zqcs_dis:1;
+               uint64_t int_zqcs_dis:1;
+               uint64_t auto_dclkdis:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t nxm_write_en:1;
+               uint64_t elev_prio_dis:1;
+               uint64_t inorder_wr:1;
+               uint64_t inorder_rd:1;
+               uint64_t throttle_wr:1;
+               uint64_t throttle_rd:1;
+               uint64_t fprch2:2;
+               uint64_t pocas:1;
+               uint64_t ddr2t:1;
+               uint64_t bwcnt:1;
+               uint64_t rdimm_ena:1;
+       } cn68xx;
+       struct cvmx_lmcx_control_cn68xx cn68xxp1;
+       struct cvmx_lmcx_control_s cn70xx;
+       struct cvmx_lmcx_control_s cn70xxp1;
+       struct cvmx_lmcx_control_s cn73xx;
+       struct cvmx_lmcx_control_s cn78xx;
+       struct cvmx_lmcx_control_s cn78xxp1;
+       struct cvmx_lmcx_control_cn66xx cnf71xx;
+       struct cvmx_lmcx_control_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ctl
+ *
+ * LMC_CTL = LMC Control
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ctl_s {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t pll_div2:1;
+               uint64_t pll_bypass:1;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t reserved_10_11:2;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } s;
+       struct cvmx_lmcx_ctl_cn30xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t pll_div2:1;
+               uint64_t pll_bypass:1;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t dreset:1;
+               uint64_t mode32b:1;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } cn30xx;
+       struct cvmx_lmcx_ctl_cn30xx cn31xx;
+       struct cvmx_lmcx_ctl_cn38xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t reserved_16_17:2;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t set_zero:1;
+               uint64_t mode128b:1;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } cn38xx;
+       struct cvmx_lmcx_ctl_cn38xx cn38xxp2;
+       struct cvmx_lmcx_ctl_cn50xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t reserved_17_17:1;
+               uint64_t pll_bypass:1;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t dreset:1;
+               uint64_t mode32b:1;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } cn50xx;
+       struct cvmx_lmcx_ctl_cn52xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t reserved_16_17:2;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t dreset:1;
+               uint64_t mode32b:1;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } cn52xx;
+       struct cvmx_lmcx_ctl_cn52xx cn52xxp1;
+       struct cvmx_lmcx_ctl_cn52xx cn56xx;
+       struct cvmx_lmcx_ctl_cn52xx cn56xxp1;
+       struct cvmx_lmcx_ctl_cn58xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:4;
+               uint64_t ddr__pctl:4;
+               uint64_t slow_scf:1;
+               uint64_t xor_bank:1;
+               uint64_t max_write_batch:4;
+               uint64_t reserved_16_17:2;
+               uint64_t rdimm_ena:1;
+               uint64_t r2r_slot:1;
+               uint64_t inorder_mwf:1;
+               uint64_t inorder_mrf:1;
+               uint64_t dreset:1;
+               uint64_t mode128b:1;
+               uint64_t fprch2:1;
+               uint64_t bprch:1;
+               uint64_t sil_lat:2;
+               uint64_t tskw:2;
+               uint64_t qs_dic:2;
+               uint64_t dic:2;
+       } cn58xx;
+       struct cvmx_lmcx_ctl_cn58xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ctl1
+ *
+ * LMC_CTL1 = LMC Control1
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_ctl1 {
+       u64 u64;
+       struct cvmx_lmcx_ctl1_s {
+               uint64_t reserved_21_63:43;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t sequence:3;
+               uint64_t sil_mode:1;
+               uint64_t dcc_enable:1;
+               uint64_t reserved_2_7:6;
+               uint64_t data_layout:2;
+       } s;
+       struct cvmx_lmcx_ctl1_cn30xx {
+               uint64_t reserved_2_63:62;
+               uint64_t data_layout:2;
+       } cn30xx;
+       struct cvmx_lmcx_ctl1_cn50xx {
+               uint64_t reserved_10_63:54;
+               uint64_t sil_mode:1;
+               uint64_t dcc_enable:1;
+               uint64_t reserved_2_7:6;
+               uint64_t data_layout:2;
+       } cn50xx;
+       struct cvmx_lmcx_ctl1_cn52xx {
+               uint64_t reserved_21_63:43;
+               uint64_t ecc_adr:1;
+               uint64_t forcewrite:4;
+               uint64_t idlepower:3;
+               uint64_t sequence:3;
+               uint64_t sil_mode:1;
+               uint64_t dcc_enable:1;
+               uint64_t reserved_0_7:8;
+       } cn52xx;
+       struct cvmx_lmcx_ctl1_cn52xx cn52xxp1;
+       struct cvmx_lmcx_ctl1_cn52xx cn56xx;
+       struct cvmx_lmcx_ctl1_cn52xx cn56xxp1;
+       struct cvmx_lmcx_ctl1_cn58xx {
+               uint64_t reserved_10_63:54;
+               uint64_t sil_mode:1;
+               uint64_t dcc_enable:1;
+               uint64_t reserved_0_7:8;
+       } cn58xx;
+       struct cvmx_lmcx_ctl1_cn58xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dbtrain_ctl
+ *
+ * Reserved.
+ *
+ */
+union cvmx_lmcx_dbtrain_ctl {
+       u64 u64;
+       struct cvmx_lmcx_dbtrain_ctl_s {
+               uint64_t reserved_63_63:1;
+               uint64_t lfsr_pattern_sel:1;
+               uint64_t cmd_count_ext:2;
+               uint64_t db_output_impedance:3;
+               uint64_t db_sel:1;
+               uint64_t tccd_sel:1;
+               uint64_t rw_train:1;
+               uint64_t read_dq_count:7;
+               uint64_t read_cmd_count:5;
+               uint64_t write_ena:1;
+               uint64_t activate:1;
+               uint64_t prank:2;
+               uint64_t lrank:3;
+               uint64_t row_a:18;
+               uint64_t bg:2;
+               uint64_t ba:2;
+               uint64_t column_a:13;
+       } s;
+       struct cvmx_lmcx_dbtrain_ctl_cn73xx {
+               uint64_t reserved_60_63:4;
+               uint64_t db_output_impedance:3;
+               uint64_t db_sel:1;
+               uint64_t tccd_sel:1;
+               uint64_t rw_train:1;
+               uint64_t read_dq_count:7;
+               uint64_t read_cmd_count:5;
+               uint64_t write_ena:1;
+               uint64_t activate:1;
+               uint64_t prank:2;
+               uint64_t lrank:3;
+               uint64_t row_a:18;
+               uint64_t bg:2;
+               uint64_t ba:2;
+               uint64_t column_a:13;
+       } cn73xx;
+       struct cvmx_lmcx_dbtrain_ctl_s cn78xx;
+       struct cvmx_lmcx_dbtrain_ctl_cnf75xx {
+               uint64_t reserved_62_63:2;
+               uint64_t cmd_count_ext:2;
+               uint64_t db_output_impedance:3;
+               uint64_t db_sel:1;
+               uint64_t tccd_sel:1;
+               uint64_t rw_train:1;
+               uint64_t read_dq_count:7;
+               uint64_t read_cmd_count:5;
+               uint64_t write_ena:1;
+               uint64_t activate:1;
+               uint64_t prank:2;
+               uint64_t lrank:3;
+               uint64_t row_a:18;
+               uint64_t bg:2;
+               uint64_t ba:2;
+               uint64_t column_a:13;
+       } cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt
+ *
+ * LMC_DCLK_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt {
+       u64 u64;
+       struct cvmx_lmcx_dclk_cnt_s {
+               uint64_t dclkcnt:64;
+       } s;
+       struct cvmx_lmcx_dclk_cnt_s cn61xx;
+       struct cvmx_lmcx_dclk_cnt_s cn63xx;
+       struct cvmx_lmcx_dclk_cnt_s cn63xxp1;
+       struct cvmx_lmcx_dclk_cnt_s cn66xx;
+       struct cvmx_lmcx_dclk_cnt_s cn68xx;
+       struct cvmx_lmcx_dclk_cnt_s cn68xxp1;
+       struct cvmx_lmcx_dclk_cnt_s cn70xx;
+       struct cvmx_lmcx_dclk_cnt_s cn70xxp1;
+       struct cvmx_lmcx_dclk_cnt_s cn73xx;
+       struct cvmx_lmcx_dclk_cnt_s cn78xx;
+       struct cvmx_lmcx_dclk_cnt_s cn78xxp1;
+       struct cvmx_lmcx_dclk_cnt_s cnf71xx;
+       struct cvmx_lmcx_dclk_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt_hi
+ *
+ * LMC_DCLK_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt_hi {
+       u64 u64;
+       struct cvmx_lmcx_dclk_cnt_hi_s {
+               uint64_t reserved_32_63:32;
+               uint64_t dclkcnt_hi:32;
+       } s;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn30xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn31xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn38xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn38xxp2;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn50xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn52xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn52xxp1;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn56xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn56xxp1;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn58xx;
+       struct cvmx_lmcx_dclk_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt_lo
+ *
+ * LMC_DCLK_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt_lo {
+       u64 u64;
+       struct cvmx_lmcx_dclk_cnt_lo_s {
+               uint64_t reserved_32_63:32;
+               uint64_t dclkcnt_lo:32;
+       } s;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn30xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn31xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn38xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn38xxp2;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn50xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn52xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn52xxp1;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn56xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn56xxp1;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn58xx;
+       struct cvmx_lmcx_dclk_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dclk_ctl
+ *
+ * LMC_DCLK_CTL = LMC DCLK generation control
+ *
+ *
+ * Notes:
+ * This CSR is only relevant for LMC1. LMC0_DCLK_CTL is not used.
+ *
+ */
+union cvmx_lmcx_dclk_ctl {
+       u64 u64;
+       struct cvmx_lmcx_dclk_ctl_s {
+               uint64_t reserved_8_63:56;
+               uint64_t off90_ena:1;
+               uint64_t dclk90_byp:1;
+               uint64_t dclk90_ld:1;
+               uint64_t dclk90_vlu:5;
+       } s;
+       struct cvmx_lmcx_dclk_ctl_s cn56xx;
+       struct cvmx_lmcx_dclk_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_ddr2_ctl
+ *
+ * LMC_DDR2_CTL = LMC DDR2 & DLL Control Register
+ *
+ */
+union cvmx_lmcx_ddr2_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ddr2_ctl_s {
+               uint64_t reserved_32_63:32;
+               uint64_t bank8:1;
+               uint64_t burst8:1;
+               uint64_t addlat:3;
+               uint64_t pocas:1;
+               uint64_t bwcnt:1;
+               uint64_t twr:3;
+               uint64_t silo_hc:1;
+               uint64_t ddr_eof:4;
+               uint64_t tfaw:5;
+               uint64_t crip_mode:1;
+               uint64_t ddr2t:1;
+               uint64_t odt_ena:1;
+               uint64_t qdll_ena:1;
+               uint64_t dll90_vlu:5;
+               uint64_t dll90_byp:1;
+               uint64_t rdqs:1;
+               uint64_t ddr2:1;
+       } s;
+       struct cvmx_lmcx_ddr2_ctl_cn30xx {
+               uint64_t reserved_32_63:32;
+               uint64_t bank8:1;
+               uint64_t burst8:1;
+               uint64_t addlat:3;
+               uint64_t pocas:1;
+               uint64_t bwcnt:1;
+               uint64_t twr:3;
+               uint64_t silo_hc:1;
+               uint64_t ddr_eof:4;
+               uint64_t tfaw:5;
+               uint64_t crip_mode:1;
+               uint64_t ddr2t:1;
+               uint64_t odt_ena:1;
+               uint64_t qdll_ena:1;
+               uint64_t dll90_vlu:5;
+               uint64_t dll90_byp:1;
+               uint64_t reserved_1_1:1;
+               uint64_t ddr2:1;
+       } cn30xx;
+       struct cvmx_lmcx_ddr2_ctl_cn30xx cn31xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn38xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn38xxp2;
+       struct cvmx_lmcx_ddr2_ctl_s cn50xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn52xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn52xxp1;
+       struct cvmx_lmcx_ddr2_ctl_s cn56xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn56xxp1;
+       struct cvmx_lmcx_ddr2_ctl_s cn58xx;
+       struct cvmx_lmcx_ddr2_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ddr4_dimm_ctl
+ *
+ * Bits 0-21 of this register are used only when LMC()_CONTROL[RDIMM_ENA] = 1.
+ *
+ * During an RCW initialization sequence, bits 0-21 control LMC's write
+ * operations to the extended DDR4 control words in the JEDEC standard
+ * registering clock driver on an RDIMM.
+ */
+union cvmx_lmcx_ddr4_dimm_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ddr4_dimm_ctl_s {
+               uint64_t reserved_28_63:36;
+               uint64_t rank_timing_enable:1;
+               uint64_t bodt_trans_mode:1;
+               uint64_t trans_mode_ena:1;
+               uint64_t read_preamble_mode:1;
+               uint64_t buff_config_da3:1;
+               uint64_t mpr_over_ena:1;
+               uint64_t ddr4_dimm1_wmask:11;
+               uint64_t ddr4_dimm0_wmask:11;
+       } s;
+       struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx {
+               uint64_t reserved_22_63:42;
+               uint64_t ddr4_dimm1_wmask:11;
+               uint64_t ddr4_dimm0_wmask:11;
+       } cn70xx;
+       struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_ddr4_dimm_ctl_s cn73xx;
+       struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xx;
+       struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xxp1;
+       struct cvmx_lmcx_ddr4_dimm_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ddr_pll_ctl
+ *
+ * This register controls the DDR_CK frequency. For details, refer to CK
+ * speed programming. See LMC initialization sequence for the initialization
+ * sequence.
+ * DDR PLL bringup sequence:
+ *
+ * 1. Write [CLKF], [CLKR], [DDR_PS_EN].
+ *
+ * 2. Wait 128 ref clock cycles (7680 core-clock cycles).
+ *
+ * 3. Write 1 to [RESET_N].
+ *
+ * 4. Wait 1152 ref clocks (1152*16 core-clock cycles).
+ *
+ * 5. Write 0 to [DDR_DIV_RESET].
+ *
+ * 6. Wait 10 ref clock cycles (160 core-clock cycles) before bringing up
+ * the DDR interface.
+ */
+union cvmx_lmcx_ddr_pll_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ddr_pll_ctl_s {
+               uint64_t reserved_45_63:19;
+               uint64_t dclk_alt_refclk_sel:1;
+               uint64_t bwadj:12;
+               uint64_t dclk_invert:1;
+               uint64_t phy_dcok:1;
+               uint64_t ddr4_mode:1;
+               uint64_t pll_fbslip:1;
+               uint64_t pll_lock:1;
+               uint64_t reserved_18_26:9;
+               uint64_t diffamp:4;
+               uint64_t cps:3;
+               uint64_t reserved_8_10:3;
+               uint64_t reset_n:1;
+               uint64_t clkf:7;
+       } s;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx {
+               uint64_t reserved_27_63:37;
+               uint64_t jtg_test_mode:1;
+               uint64_t dfm_div_reset:1;
+               uint64_t dfm_ps_en:3;
+               uint64_t ddr_div_reset:1;
+               uint64_t ddr_ps_en:3;
+               uint64_t diffamp:4;
+               uint64_t cps:3;
+               uint64_t cpb:3;
+               uint64_t reset_n:1;
+               uint64_t clkf:7;
+       } cn61xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xxp1;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn66xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xxp1;
+       struct cvmx_lmcx_ddr_pll_ctl_cn70xx {
+               uint64_t reserved_31_63:33;
+               uint64_t phy_dcok:1;
+               uint64_t ddr4_mode:1;
+               uint64_t pll_fbslip:1;
+               uint64_t pll_lock:1;
+               uint64_t pll_rfslip:1;
+               uint64_t clkr:2;
+               uint64_t jtg_test_mode:1;
+               uint64_t ddr_div_reset:1;
+               uint64_t ddr_ps_en:4;
+               uint64_t reserved_8_17:10;
+               uint64_t reset_n:1;
+               uint64_t clkf:7;
+       } cn70xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_ddr_pll_ctl_cn73xx {
+               uint64_t reserved_45_63:19;
+               uint64_t dclk_alt_refclk_sel:1;
+               uint64_t bwadj:12;
+               uint64_t dclk_invert:1;
+               uint64_t phy_dcok:1;
+               uint64_t ddr4_mode:1;
+               uint64_t pll_fbslip:1;
+               uint64_t pll_lock:1;
+               uint64_t pll_rfslip:1;
+               uint64_t clkr:2;
+               uint64_t jtg_test_mode:1;
+               uint64_t ddr_div_reset:1;
+               uint64_t ddr_ps_en:4;
+               uint64_t reserved_9_17:9;
+               uint64_t clkf_ext:1;
+               uint64_t reset_n:1;
+               uint64_t clkf:7;
+       } cn73xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xxp1;
+       struct cvmx_lmcx_ddr_pll_ctl_cn61xx cnf71xx;
+       struct cvmx_lmcx_ddr_pll_ctl_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_delay_cfg
+ *
+ * LMC_DELAY_CFG = Open-loop delay line settings
+ *
+ *
+ * Notes:
+ * The DQ bits add OUTGOING delay only to dq, dqs_[p,n], cb, cbs_[p,n], dqm.
+ * Delay is approximately 50-80ps per setting depending on process/voltage.
+ * There is no need to add incoming delay since by default all strobe bits
+ * are delayed internally by 90 degrees (as was always the case in previous
+ * passes and past chips.
+ *
+ * The CMD add delay to all command bits DDR_RAS, DDR_CAS, DDR_A<15:0>,
+ * DDR_BA<2:0>, DDR_n_CS<1:0>_L, DDR_WE, DDR_CKE and DDR_ODT_<7:0>.
+ * Again, delay is 50-80ps per tap.
+ *
+ * The CLK bits add delay to all clock signals DDR_CK_<5:0>_P and
+ * DDR_CK_<5:0>_N.  Again, delay is 50-80ps per tap.
+ *
+ * The usage scenario is the following: There is too much delay on command
+ * signals and setup on command is not met. The user can then delay the
+ * clock until setup is met.
+ *
+ * At the same time though, dq/dqs should be delayed because there is also
+ * a DDR spec tying dqs with clock. If clock is too much delayed with
+ * respect to dqs, writes will start to fail.
+ *
+ * This scheme should eliminate the board need of adding routing delay to
+ * clock signals to make high frequencies work.
+ */
+union cvmx_lmcx_delay_cfg {
+       u64 u64;
+       struct cvmx_lmcx_delay_cfg_s {
+               uint64_t reserved_15_63:49;
+               uint64_t dq:5;
+               uint64_t cmd:5;
+               uint64_t clk:5;
+       } s;
+       struct cvmx_lmcx_delay_cfg_s cn30xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx {
+               uint64_t reserved_14_63:50;
+               uint64_t dq:4;
+               uint64_t reserved_9_9:1;
+               uint64_t cmd:4;
+               uint64_t reserved_4_4:1;
+               uint64_t clk:4;
+       } cn38xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn50xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn52xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn52xxp1;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn56xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn56xxp1;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn58xx;
+       struct cvmx_lmcx_delay_cfg_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dimm#_ddr4_params0
+ *
+ * This register contains values to be programmed into the extra DDR4 control
+ * words in the corresponding (registered) DIMM. These are control words
+ * RC1x through RC8x.
+ */
+union cvmx_lmcx_dimmx_ddr4_params0 {
+       u64 u64;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s {
+               uint64_t rc8x:8;
+               uint64_t rc7x:8;
+               uint64_t rc6x:8;
+               uint64_t rc5x:8;
+               uint64_t rc4x:8;
+               uint64_t rc3x:8;
+               uint64_t rc2x:8;
+               uint64_t rc1x:8;
+       } s;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xx;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xxp1;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cn73xx;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xx;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xxp1;
+       struct cvmx_lmcx_dimmx_ddr4_params0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm#_ddr4_params1
+ *
+ * This register contains values to be programmed into the extra DDR4 control
+ * words in the corresponding (registered) DIMM. These are control words
+ * RC9x through RCBx.
+ */
+union cvmx_lmcx_dimmx_ddr4_params1 {
+       u64 u64;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s {
+               uint64_t reserved_24_63:40;
+               uint64_t rcbx:8;
+               uint64_t rcax:8;
+               uint64_t rc9x:8;
+       } s;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xx;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xxp1;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cn73xx;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xx;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xxp1;
+       struct cvmx_lmcx_dimmx_ddr4_params1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm#_params
+ *
+ * This register contains values to be programmed into each control word in
+ * the corresponding (registered) DIMM. The control words allow optimization
+ * of the device properties for different raw card designs. Note that LMC
+ * only uses this CSR when LMC()_CONTROL[RDIMM_ENA]=1. During a power-up/init
+ * sequence, LMC writes these fields into the control words in the JEDEC
+ * standard DDR3 SSTE32882 registering clock driver or DDR4 Register
+ * DDR4RCD01 on an RDIMM when corresponding LMC()_DIMM_CTL[DIMM*_WMASK]
+ * bits are set.
+ */
+union cvmx_lmcx_dimmx_params {
+       u64 u64;
+       struct cvmx_lmcx_dimmx_params_s {
+               uint64_t rc15:4;
+               uint64_t rc14:4;
+               uint64_t rc13:4;
+               uint64_t rc12:4;
+               uint64_t rc11:4;
+               uint64_t rc10:4;
+               uint64_t rc9:4;
+               uint64_t rc8:4;
+               uint64_t rc7:4;
+               uint64_t rc6:4;
+               uint64_t rc5:4;
+               uint64_t rc4:4;
+               uint64_t rc3:4;
+               uint64_t rc2:4;
+               uint64_t rc1:4;
+               uint64_t rc0:4;
+       } s;
+       struct cvmx_lmcx_dimmx_params_s cn61xx;
+       struct cvmx_lmcx_dimmx_params_s cn63xx;
+       struct cvmx_lmcx_dimmx_params_s cn63xxp1;
+       struct cvmx_lmcx_dimmx_params_s cn66xx;
+       struct cvmx_lmcx_dimmx_params_s cn68xx;
+       struct cvmx_lmcx_dimmx_params_s cn68xxp1;
+       struct cvmx_lmcx_dimmx_params_s cn70xx;
+       struct cvmx_lmcx_dimmx_params_s cn70xxp1;
+       struct cvmx_lmcx_dimmx_params_s cn73xx;
+       struct cvmx_lmcx_dimmx_params_s cn78xx;
+       struct cvmx_lmcx_dimmx_params_s cn78xxp1;
+       struct cvmx_lmcx_dimmx_params_s cnf71xx;
+       struct cvmx_lmcx_dimmx_params_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm_ctl
+ *
+ * Note that this CSR is only used when LMC()_CONTROL[RDIMM_ENA] = 1. During
+ * a power-up/init sequence, this CSR controls LMC's write operations to the
+ * control words in the JEDEC standard DDR3 SSTE32882 registering clock
+ * driver or DDR4 Register DDR4RCD01 on an RDIMM.
+ */
+union cvmx_lmcx_dimm_ctl {
+       u64 u64;
+       struct cvmx_lmcx_dimm_ctl_s {
+               uint64_t reserved_46_63:18;
+               uint64_t parity:1;
+               uint64_t tcws:13;
+               uint64_t dimm1_wmask:16;
+               uint64_t dimm0_wmask:16;
+       } s;
+       struct cvmx_lmcx_dimm_ctl_s cn61xx;
+       struct cvmx_lmcx_dimm_ctl_s cn63xx;
+       struct cvmx_lmcx_dimm_ctl_s cn63xxp1;
+       struct cvmx_lmcx_dimm_ctl_s cn66xx;
+       struct cvmx_lmcx_dimm_ctl_s cn68xx;
+       struct cvmx_lmcx_dimm_ctl_s cn68xxp1;
+       struct cvmx_lmcx_dimm_ctl_s cn70xx;
+       struct cvmx_lmcx_dimm_ctl_s cn70xxp1;
+       struct cvmx_lmcx_dimm_ctl_s cn73xx;
+       struct cvmx_lmcx_dimm_ctl_s cn78xx;
+       struct cvmx_lmcx_dimm_ctl_s cn78xxp1;
+       struct cvmx_lmcx_dimm_ctl_s cnf71xx;
+       struct cvmx_lmcx_dimm_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl
+ *
+ * LMC_DLL_CTL = LMC DLL control and DCLK reset
+ *
+ */
+union cvmx_lmcx_dll_ctl {
+       u64 u64;
+       struct cvmx_lmcx_dll_ctl_s {
+               uint64_t reserved_8_63:56;
+               uint64_t dreset:1;
+               uint64_t dll90_byp:1;
+               uint64_t dll90_ena:1;
+               uint64_t dll90_vlu:5;
+       } s;
+       struct cvmx_lmcx_dll_ctl_s cn52xx;
+       struct cvmx_lmcx_dll_ctl_s cn52xxp1;
+       struct cvmx_lmcx_dll_ctl_s cn56xx;
+       struct cvmx_lmcx_dll_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl2
+ *
+ * See LMC initialization sequence for the initialization sequence.
+ *
+ */
+union cvmx_lmcx_dll_ctl2 {
+       u64 u64;
+       struct cvmx_lmcx_dll_ctl2_s {
+               uint64_t reserved_0_63:64;
+       } s;
+       struct cvmx_lmcx_dll_ctl2_cn61xx {
+               uint64_t reserved_16_63:48;
+               uint64_t intf_en:1;
+               uint64_t dll_bringup:1;
+               uint64_t dreset:1;
+               uint64_t quad_dll_ena:1;
+               uint64_t byp_sel:4;
+               uint64_t byp_setting:8;
+       } cn61xx;
+       struct cvmx_lmcx_dll_ctl2_cn63xx {
+               uint64_t reserved_15_63:49;
+               uint64_t dll_bringup:1;
+               uint64_t dreset:1;
+               uint64_t quad_dll_ena:1;
+               uint64_t byp_sel:4;
+               uint64_t byp_setting:8;
+       } cn63xx;
+       struct cvmx_lmcx_dll_ctl2_cn63xx cn63xxp1;
+       struct cvmx_lmcx_dll_ctl2_cn63xx cn66xx;
+       struct cvmx_lmcx_dll_ctl2_cn61xx cn68xx;
+       struct cvmx_lmcx_dll_ctl2_cn61xx cn68xxp1;
+       struct cvmx_lmcx_dll_ctl2_cn70xx {
+               uint64_t reserved_17_63:47;
+               uint64_t intf_en:1;
+               uint64_t dll_bringup:1;
+               uint64_t dreset:1;
+               uint64_t quad_dll_ena:1;
+               uint64_t byp_sel:4;
+               uint64_t byp_setting:9;
+       } cn70xx;
+       struct cvmx_lmcx_dll_ctl2_cn70xx cn70xxp1;
+       struct cvmx_lmcx_dll_ctl2_cn70xx cn73xx;
+       struct cvmx_lmcx_dll_ctl2_cn70xx cn78xx;
+       struct cvmx_lmcx_dll_ctl2_cn70xx cn78xxp1;
+       struct cvmx_lmcx_dll_ctl2_cn61xx cnf71xx;
+       struct cvmx_lmcx_dll_ctl2_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl3
+ *
+ * LMC_DLL_CTL3 = LMC DLL control and DCLK reset
+ *
+ */
+union cvmx_lmcx_dll_ctl3 {
+       u64 u64;
+       struct cvmx_lmcx_dll_ctl3_s {
+               uint64_t reserved_50_63:14;
+               uint64_t wr_deskew_ena:1;
+               uint64_t wr_deskew_ld:1;
+               uint64_t bit_select:4;
+               uint64_t reserved_0_43:44;
+       } s;
+       struct cvmx_lmcx_dll_ctl3_cn61xx {
+               uint64_t reserved_41_63:23;
+               uint64_t dclk90_fwd:1;
+               uint64_t ddr_90_dly_byp:1;
+               uint64_t dclk90_recal_dis:1;
+               uint64_t dclk90_byp_sel:1;
+               uint64_t dclk90_byp_setting:8;
+               uint64_t dll_fast:1;
+               uint64_t dll90_setting:8;
+               uint64_t fine_tune_mode:1;
+               uint64_t dll_mode:1;
+               uint64_t dll90_byte_sel:4;
+               uint64_t offset_ena:1;
+               uint64_t load_offset:1;
+               uint64_t mode_sel:2;
+               uint64_t byte_sel:4;
+               uint64_t offset:6;
+       } cn61xx;
+       struct cvmx_lmcx_dll_ctl3_cn63xx {
+               uint64_t reserved_29_63:35;
+               uint64_t dll_fast:1;
+               uint64_t dll90_setting:8;
+               uint64_t fine_tune_mode:1;
+               uint64_t dll_mode:1;
+               uint64_t dll90_byte_sel:4;
+               uint64_t offset_ena:1;
+               uint64_t load_offset:1;
+               uint64_t mode_sel:2;
+               uint64_t byte_sel:4;
+               uint64_t offset:6;
+       } cn63xx;
+       struct cvmx_lmcx_dll_ctl3_cn63xx cn63xxp1;
+       struct cvmx_lmcx_dll_ctl3_cn63xx cn66xx;
+       struct cvmx_lmcx_dll_ctl3_cn61xx cn68xx;
+       struct cvmx_lmcx_dll_ctl3_cn61xx cn68xxp1;
+       struct cvmx_lmcx_dll_ctl3_cn70xx {
+               uint64_t reserved_44_63:20;
+               uint64_t dclk90_fwd:1;
+               uint64_t ddr_90_dly_byp:1;
+               uint64_t dclk90_recal_dis:1;
+               uint64_t dclk90_byp_sel:1;
+               uint64_t dclk90_byp_setting:9;
+               uint64_t dll_fast:1;
+               uint64_t dll90_setting:9;
+               uint64_t fine_tune_mode:1;
+               uint64_t dll_mode:1;
+               uint64_t dll90_byte_sel:4;
+               uint64_t offset_ena:1;
+               uint64_t load_offset:1;
+               uint64_t mode_sel:2;
+               uint64_t byte_sel:4;
+               uint64_t offset:7;
+       } cn70xx;
+       struct cvmx_lmcx_dll_ctl3_cn70xx cn70xxp1;
+       struct cvmx_lmcx_dll_ctl3_cn73xx {
+               uint64_t reserved_50_63:14;
+               uint64_t wr_deskew_ena:1;
+               uint64_t wr_deskew_ld:1;
+               uint64_t bit_select:4;
+               uint64_t dclk90_fwd:1;
+               uint64_t ddr_90_dly_byp:1;
+               uint64_t dclk90_recal_dis:1;
+               uint64_t dclk90_byp_sel:1;
+               uint64_t dclk90_byp_setting:9;
+               uint64_t dll_fast:1;
+               uint64_t dll90_setting:9;
+               uint64_t fine_tune_mode:1;
+               uint64_t dll_mode:1;
+               uint64_t dll90_byte_sel:4;
+               uint64_t offset_ena:1;
+               uint64_t load_offset:1;
+               uint64_t mode_sel:2;
+               uint64_t byte_sel:4;
+               uint64_t offset:7;
+       } cn73xx;
+       struct cvmx_lmcx_dll_ctl3_cn73xx cn78xx;
+       struct cvmx_lmcx_dll_ctl3_cn73xx cn78xxp1;
+       struct cvmx_lmcx_dll_ctl3_cn61xx cnf71xx;
+       struct cvmx_lmcx_dll_ctl3_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dual_memcfg
+ *
+ * This register controls certain parameters of dual-memory configuration.
+ *
+ * This register enables the design to have two separate memory
+ * configurations, selected dynamically by the reference address. Note
+ * however, that both configurations share LMC()_CONTROL[XOR_BANK],
+ * LMC()_CONFIG [PBANK_LSB], LMC()_CONFIG[RANK_ENA], and all timing parameters.
+ *
+ * In this description:
+ * * config0 refers to the normal memory configuration that is defined by the
+ * LMC()_CONFIG[ROW_LSB] parameter
+ * * config1 refers to the dual (or second) memory configuration that is
+ * defined by this register.
+ */
+union cvmx_lmcx_dual_memcfg {
+       u64 u64;
+       struct cvmx_lmcx_dual_memcfg_s {
+               uint64_t reserved_20_63:44;
+               uint64_t bank8:1;
+               uint64_t row_lsb:3;
+               uint64_t reserved_8_15:8;
+               uint64_t cs_mask:8;
+       } s;
+       struct cvmx_lmcx_dual_memcfg_s cn50xx;
+       struct cvmx_lmcx_dual_memcfg_s cn52xx;
+       struct cvmx_lmcx_dual_memcfg_s cn52xxp1;
+       struct cvmx_lmcx_dual_memcfg_s cn56xx;
+       struct cvmx_lmcx_dual_memcfg_s cn56xxp1;
+       struct cvmx_lmcx_dual_memcfg_s cn58xx;
+       struct cvmx_lmcx_dual_memcfg_s cn58xxp1;
+       struct cvmx_lmcx_dual_memcfg_cn61xx {
+               uint64_t reserved_19_63:45;
+               uint64_t row_lsb:3;
+               uint64_t reserved_8_15:8;
+               uint64_t cs_mask:8;
+       } cn61xx;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cn63xx;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cn63xxp1;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cn66xx;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cn68xx;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cn68xxp1;
+       struct cvmx_lmcx_dual_memcfg_cn70xx {
+               uint64_t reserved_19_63:45;
+               uint64_t row_lsb:3;
+               uint64_t reserved_4_15:12;
+               uint64_t cs_mask:4;
+       } cn70xx;
+       struct cvmx_lmcx_dual_memcfg_cn70xx cn70xxp1;
+       struct cvmx_lmcx_dual_memcfg_cn70xx cn73xx;
+       struct cvmx_lmcx_dual_memcfg_cn70xx cn78xx;
+       struct cvmx_lmcx_dual_memcfg_cn70xx cn78xxp1;
+       struct cvmx_lmcx_dual_memcfg_cn61xx cnf71xx;
+       struct cvmx_lmcx_dual_memcfg_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ecc_parity_test
+ *
+ * This register has bits to control the generation of ECC and command
+ * address parity errors. ECC error is generated by enabling
+ * [CA_PARITY_CORRUPT_ENA] and selecting any of the [ECC_CORRUPT_IDX]
+ * index of the dataword from the cacheline to be corrupted.
+ * User needs to select which bit of the 128-bit dataword to corrupt by
+ * asserting any of the CHAR_MASK0 and CHAR_MASK2 bits. (CHAR_MASK0 and
+ * CHAR_MASK2 corresponds to the lower and upper 64-bit signal that can
+ * corrupt any individual bit of the data).
+ *
+ * Command address parity error is generated by enabling
+ * [CA_PARITY_CORRUPT_ENA] and selecting the DDR command that the parity
+ * is to be corrupted with through [CA_PARITY_SEL].
+ */
+union cvmx_lmcx_ecc_parity_test {
+       u64 u64;
+       struct cvmx_lmcx_ecc_parity_test_s {
+               uint64_t reserved_12_63:52;
+               uint64_t ecc_corrupt_ena:1;
+               uint64_t ecc_corrupt_idx:3;
+               uint64_t reserved_6_7:2;
+               uint64_t ca_parity_corrupt_ena:1;
+               uint64_t ca_parity_sel:5;
+       } s;
+       struct cvmx_lmcx_ecc_parity_test_s cn73xx;
+       struct cvmx_lmcx_ecc_parity_test_s cn78xx;
+       struct cvmx_lmcx_ecc_parity_test_s cn78xxp1;
+       struct cvmx_lmcx_ecc_parity_test_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ecc_synd
+ *
+ * LMC_ECC_SYND = MRD ECC Syndromes
+ *
+ */
+union cvmx_lmcx_ecc_synd {
+       u64 u64;
+       struct cvmx_lmcx_ecc_synd_s {
+               uint64_t reserved_32_63:32;
+               uint64_t mrdsyn3:8;
+               uint64_t mrdsyn2:8;
+               uint64_t mrdsyn1:8;
+               uint64_t mrdsyn0:8;
+       } s;
+       struct cvmx_lmcx_ecc_synd_s cn30xx;
+       struct cvmx_lmcx_ecc_synd_s cn31xx;
+       struct cvmx_lmcx_ecc_synd_s cn38xx;
+       struct cvmx_lmcx_ecc_synd_s cn38xxp2;
+       struct cvmx_lmcx_ecc_synd_s cn50xx;
+       struct cvmx_lmcx_ecc_synd_s cn52xx;
+       struct cvmx_lmcx_ecc_synd_s cn52xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn56xx;
+       struct cvmx_lmcx_ecc_synd_s cn56xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn58xx;
+       struct cvmx_lmcx_ecc_synd_s cn58xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn61xx;
+       struct cvmx_lmcx_ecc_synd_s cn63xx;
+       struct cvmx_lmcx_ecc_synd_s cn63xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn66xx;
+       struct cvmx_lmcx_ecc_synd_s cn68xx;
+       struct cvmx_lmcx_ecc_synd_s cn68xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn70xx;
+       struct cvmx_lmcx_ecc_synd_s cn70xxp1;
+       struct cvmx_lmcx_ecc_synd_s cn73xx;
+       struct cvmx_lmcx_ecc_synd_s cn78xx;
+       struct cvmx_lmcx_ecc_synd_s cn78xxp1;
+       struct cvmx_lmcx_ecc_synd_s cnf71xx;
+       struct cvmx_lmcx_ecc_synd_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ext_config
+ *
+ * This register has additional configuration and control bits for the LMC.
+ *
+ */
+union cvmx_lmcx_ext_config {
+       u64 u64;
+       struct cvmx_lmcx_ext_config_s {
+               uint64_t reserved_61_63:3;
+               uint64_t bc4_dqs_ena:1;
+               uint64_t ref_block:1;
+               uint64_t mrs_side:1;
+               uint64_t mrs_one_side:1;
+               uint64_t mrs_bside_invert_disable:1;
+               uint64_t dimm_sel_invert_off:1;
+               uint64_t dimm_sel_force_invert:1;
+               uint64_t coalesce_address_mode:1;
+               uint64_t dimm1_cid:2;
+               uint64_t dimm0_cid:2;
+               uint64_t rcd_parity_check:1;
+               uint64_t reserved_46_47:2;
+               uint64_t error_alert_n_sample:1;
+               uint64_t ea_int_polarity:1;
+               uint64_t reserved_43_43:1;
+               uint64_t par_addr_mask:3;
+               uint64_t reserved_38_39:2;
+               uint64_t mrs_cmd_override:1;
+               uint64_t mrs_cmd_select:1;
+               uint64_t reserved_33_35:3;
+               uint64_t invert_data:1;
+               uint64_t reserved_30_31:2;
+               uint64_t cmd_rti:1;
+               uint64_t cal_ena:1;
+               uint64_t reserved_27_27:1;
+               uint64_t par_include_a17:1;
+               uint64_t par_include_bg1:1;
+               uint64_t gen_par:1;
+               uint64_t reserved_21_23:3;
+               uint64_t vrefint_seq_deskew:1;
+               uint64_t read_ena_bprch:1;
+               uint64_t read_ena_fprch:1;
+               uint64_t slot_ctl_reset_force:1;
+               uint64_t ref_int_lsbs:9;
+               uint64_t drive_ena_bprch:1;
+               uint64_t drive_ena_fprch:1;
+               uint64_t dlcram_flip_synd:2;
+               uint64_t dlcram_cor_dis:1;
+               uint64_t dlc_nxm_rd:1;
+               uint64_t l2c_nxm_rd:1;
+               uint64_t l2c_nxm_wr:1;
+       } s;
+       struct cvmx_lmcx_ext_config_cn70xx {
+               uint64_t reserved_21_63:43;
+               uint64_t vrefint_seq_deskew:1;
+               uint64_t read_ena_bprch:1;
+               uint64_t read_ena_fprch:1;
+               uint64_t slot_ctl_reset_force:1;
+               uint64_t ref_int_lsbs:9;
+               uint64_t drive_ena_bprch:1;
+               uint64_t drive_ena_fprch:1;
+               uint64_t dlcram_flip_synd:2;
+               uint64_t dlcram_cor_dis:1;
+               uint64_t dlc_nxm_rd:1;
+               uint64_t l2c_nxm_rd:1;
+               uint64_t l2c_nxm_wr:1;
+       } cn70xx;
+       struct cvmx_lmcx_ext_config_cn70xx cn70xxp1;
+       struct cvmx_lmcx_ext_config_cn73xx {
+               uint64_t reserved_60_63:4;
+               uint64_t ref_block:1;
+               uint64_t mrs_side:1;
+               uint64_t mrs_one_side:1;
+               uint64_t mrs_bside_invert_disable:1;
+               uint64_t dimm_sel_invert_off:1;
+               uint64_t dimm_sel_force_invert:1;
+               uint64_t coalesce_address_mode:1;
+               uint64_t dimm1_cid:2;
+               uint64_t dimm0_cid:2;
+               uint64_t rcd_parity_check:1;
+               uint64_t reserved_46_47:2;
+               uint64_t error_alert_n_sample:1;
+               uint64_t ea_int_polarity:1;
+               uint64_t reserved_43_43:1;
+               uint64_t par_addr_mask:3;
+               uint64_t reserved_38_39:2;
+               uint64_t mrs_cmd_override:1;
+               uint64_t mrs_cmd_select:1;
+               uint64_t reserved_33_35:3;
+               uint64_t invert_data:1;
+               uint64_t reserved_30_31:2;
+               uint64_t cmd_rti:1;
+               uint64_t cal_ena:1;
+               uint64_t reserved_27_27:1;
+               uint64_t par_include_a17:1;
+               uint64_t par_include_bg1:1;
+               uint64_t gen_par:1;
+               uint64_t reserved_21_23:3;
+               uint64_t vrefint_seq_deskew:1;
+               uint64_t read_ena_bprch:1;
+               uint64_t read_ena_fprch:1;
+               uint64_t slot_ctl_reset_force:1;
+               uint64_t ref_int_lsbs:9;
+               uint64_t drive_ena_bprch:1;
+               uint64_t drive_ena_fprch:1;
+               uint64_t dlcram_flip_synd:2;
+               uint64_t dlcram_cor_dis:1;
+               uint64_t dlc_nxm_rd:1;
+               uint64_t l2c_nxm_rd:1;
+               uint64_t l2c_nxm_wr:1;
+       } cn73xx;
+       struct cvmx_lmcx_ext_config_s cn78xx;
+       struct cvmx_lmcx_ext_config_s cn78xxp1;
+       struct cvmx_lmcx_ext_config_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ext_config2
+ *
+ * This register has additional configuration and control bits for the LMC.
+ *
+ */
+union cvmx_lmcx_ext_config2 {
+       u64 u64;
+       struct cvmx_lmcx_ext_config2_s {
+               uint64_t reserved_27_63:37;
+               uint64_t sref_auto_idle_thres:5;
+               uint64_t sref_auto_enable:1;
+               uint64_t delay_unload_r3:1;
+               uint64_t delay_unload_r2:1;
+               uint64_t delay_unload_r1:1;
+               uint64_t delay_unload_r0:1;
+               uint64_t early_dqx2:1;
+               uint64_t xor_bank_sel:4;
+               uint64_t reserved_10_11:2;
+               uint64_t row_col_switch:1;
+               uint64_t trr_on:1;
+               uint64_t mac:3;
+               uint64_t macram_scrub_done:1;
+               uint64_t macram_scrub:1;
+               uint64_t macram_flip_synd:2;
+               uint64_t macram_cor_dis:1;
+       } s;
+       struct cvmx_lmcx_ext_config2_cn73xx {
+               uint64_t reserved_10_63:54;
+               uint64_t row_col_switch:1;
+               uint64_t trr_on:1;
+               uint64_t mac:3;
+               uint64_t macram_scrub_done:1;
+               uint64_t macram_scrub:1;
+               uint64_t macram_flip_synd:2;
+               uint64_t macram_cor_dis:1;
+       } cn73xx;
+       struct cvmx_lmcx_ext_config2_s cn78xx;
+       struct cvmx_lmcx_ext_config2_cnf75xx {
+               uint64_t reserved_21_63:43;
+               uint64_t delay_unload_r3:1;
+               uint64_t delay_unload_r2:1;
+               uint64_t delay_unload_r1:1;
+               uint64_t delay_unload_r0:1;
+               uint64_t early_dqx2:1;
+               uint64_t xor_bank_sel:4;
+               uint64_t reserved_10_11:2;
+               uint64_t row_col_switch:1;
+               uint64_t trr_on:1;
+               uint64_t mac:3;
+               uint64_t macram_scrub_done:1;
+               uint64_t macram_scrub:1;
+               uint64_t macram_flip_synd:2;
+               uint64_t macram_cor_dis:1;
+       } cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_fadr
+ *
+ * This register only captures the first transaction with ECC errors. A DED
+ * error can over-write this register with its failing addresses if the
+ * first error was a SEC. If you write LMC()_INT -> SEC_ERR/DED_ERR, it
+ * clears the error bits and captures the next failing address. If FDIMM
+ * is 1, that means the error is in the high DIMM. LMC()_FADR captures the
+ * failing pre-scrambled address location (split into DIMM, bunk, bank, etc).
+ * If scrambling is off, then LMC()_FADR will also capture the failing
+ * physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures the
+ * actual failing address location in the physical DRAM parts, i.e.,
+ * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical
+ * location in the DRAM parts (split into DIMM, bunk, bank, etc.)
+ * If scrambling is off, the pre-scramble and post-scramble addresses are
+ * the same; and so the contents of LMC()_SCRAMBLED_FADR match the contents
+ * of LMC()_FADR.
+ */
+union cvmx_lmcx_fadr {
+       u64 u64;
+       struct cvmx_lmcx_fadr_s {
+               uint64_t reserved_43_63:21;
+               uint64_t fcid:3;
+               uint64_t fill_order:2;
+               uint64_t reserved_0_37:38;
+       } s;
+       struct cvmx_lmcx_fadr_cn30xx {
+               uint64_t reserved_32_63:32;
+               uint64_t fdimm:2;
+               uint64_t fbunk:1;
+               uint64_t fbank:3;
+               uint64_t frow:14;
+               uint64_t fcol:12;
+       } cn30xx;
+       struct cvmx_lmcx_fadr_cn30xx cn31xx;
+       struct cvmx_lmcx_fadr_cn30xx cn38xx;
+       struct cvmx_lmcx_fadr_cn30xx cn38xxp2;
+       struct cvmx_lmcx_fadr_cn30xx cn50xx;
+       struct cvmx_lmcx_fadr_cn30xx cn52xx;
+       struct cvmx_lmcx_fadr_cn30xx cn52xxp1;
+       struct cvmx_lmcx_fadr_cn30xx cn56xx;
+       struct cvmx_lmcx_fadr_cn30xx cn56xxp1;
+       struct cvmx_lmcx_fadr_cn30xx cn58xx;
+       struct cvmx_lmcx_fadr_cn30xx cn58xxp1;
+       struct cvmx_lmcx_fadr_cn61xx {
+               uint64_t reserved_36_63:28;
+               uint64_t fdimm:2;
+               uint64_t fbunk:1;
+               uint64_t fbank:3;
+               uint64_t frow:16;
+               uint64_t fcol:14;
+       } cn61xx;
+       struct cvmx_lmcx_fadr_cn61xx cn63xx;
+       struct cvmx_lmcx_fadr_cn61xx cn63xxp1;
+       struct cvmx_lmcx_fadr_cn61xx cn66xx;
+       struct cvmx_lmcx_fadr_cn61xx cn68xx;
+       struct cvmx_lmcx_fadr_cn61xx cn68xxp1;
+       struct cvmx_lmcx_fadr_cn70xx {
+               uint64_t reserved_40_63:24;
+               uint64_t fill_order:2;
+               uint64_t fdimm:1;
+               uint64_t fbunk:1;
+               uint64_t fbank:4;
+               uint64_t frow:18;
+               uint64_t fcol:14;
+       } cn70xx;
+       struct cvmx_lmcx_fadr_cn70xx cn70xxp1;
+       struct cvmx_lmcx_fadr_cn73xx {
+               uint64_t reserved_43_63:21;
+               uint64_t fcid:3;
+               uint64_t fill_order:2;
+               uint64_t fdimm:1;
+               uint64_t fbunk:1;
+               uint64_t fbank:4;
+               uint64_t frow:18;
+               uint64_t fcol:14;
+       } cn73xx;
+       struct cvmx_lmcx_fadr_cn73xx cn78xx;
+       struct cvmx_lmcx_fadr_cn73xx cn78xxp1;
+       struct cvmx_lmcx_fadr_cn61xx cnf71xx;
+       struct cvmx_lmcx_fadr_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose0
+ */
+union cvmx_lmcx_general_purpose0 {
+       u64 u64;
+       struct cvmx_lmcx_general_purpose0_s {
+               uint64_t data:64;
+       } s;
+       struct cvmx_lmcx_general_purpose0_s cn73xx;
+       struct cvmx_lmcx_general_purpose0_s cn78xx;
+       struct cvmx_lmcx_general_purpose0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose1
+ */
+union cvmx_lmcx_general_purpose1 {
+       u64 u64;
+       struct cvmx_lmcx_general_purpose1_s {
+               uint64_t data:64;
+       } s;
+       struct cvmx_lmcx_general_purpose1_s cn73xx;
+       struct cvmx_lmcx_general_purpose1_s cn78xx;
+       struct cvmx_lmcx_general_purpose1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose2
+ */
+union cvmx_lmcx_general_purpose2 {
+       u64 u64;
+       struct cvmx_lmcx_general_purpose2_s {
+               uint64_t reserved_16_63:48;
+               uint64_t data:16;
+       } s;
+       struct cvmx_lmcx_general_purpose2_s cn73xx;
+       struct cvmx_lmcx_general_purpose2_s cn78xx;
+       struct cvmx_lmcx_general_purpose2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt
+ *
+ * LMC_IFB_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt {
+       u64 u64;
+       struct cvmx_lmcx_ifb_cnt_s {
+               uint64_t ifbcnt:64;
+       } s;
+       struct cvmx_lmcx_ifb_cnt_s cn61xx;
+       struct cvmx_lmcx_ifb_cnt_s cn63xx;
+       struct cvmx_lmcx_ifb_cnt_s cn63xxp1;
+       struct cvmx_lmcx_ifb_cnt_s cn66xx;
+       struct cvmx_lmcx_ifb_cnt_s cn68xx;
+       struct cvmx_lmcx_ifb_cnt_s cn68xxp1;
+       struct cvmx_lmcx_ifb_cnt_s cn70xx;
+       struct cvmx_lmcx_ifb_cnt_s cn70xxp1;
+       struct cvmx_lmcx_ifb_cnt_s cn73xx;
+       struct cvmx_lmcx_ifb_cnt_s cn78xx;
+       struct cvmx_lmcx_ifb_cnt_s cn78xxp1;
+       struct cvmx_lmcx_ifb_cnt_s cnf71xx;
+       struct cvmx_lmcx_ifb_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt_hi
+ *
+ * LMC_IFB_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt_hi {
+       u64 u64;
+       struct cvmx_lmcx_ifb_cnt_hi_s {
+               uint64_t reserved_32_63:32;
+               uint64_t ifbcnt_hi:32;
+       } s;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn30xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn31xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn38xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn38xxp2;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn50xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn52xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn52xxp1;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn56xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn56xxp1;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn58xx;
+       struct cvmx_lmcx_ifb_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt_lo
+ *
+ * LMC_IFB_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt_lo {
+       u64 u64;
+       struct cvmx_lmcx_ifb_cnt_lo_s {
+               uint64_t reserved_32_63:32;
+               uint64_t ifbcnt_lo:32;
+       } s;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn30xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn31xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn38xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn38xxp2;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn50xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn52xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn52xxp1;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn56xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn56xxp1;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn58xx;
+       struct cvmx_lmcx_ifb_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_int
+ *
+ * This register contains the different interrupt-summary bits of the LMC.
+ *
+ */
+union cvmx_lmcx_int {
+       u64 u64;
+       struct cvmx_lmcx_int_s {
+               uint64_t reserved_14_63:50;
+               uint64_t macram_ded_err:1;
+               uint64_t macram_sec_err:1;
+               uint64_t ddr_err:1;
+               uint64_t dlcram_ded_err:1;
+               uint64_t dlcram_sec_err:1;
+               uint64_t ded_err:4;
+               uint64_t sec_err:4;
+               uint64_t nxm_wr_err:1;
+       } s;
+       struct cvmx_lmcx_int_cn61xx {
+               uint64_t reserved_9_63:55;
+               uint64_t ded_err:4;
+               uint64_t sec_err:4;
+               uint64_t nxm_wr_err:1;
+       } cn61xx;
+       struct cvmx_lmcx_int_cn61xx cn63xx;
+       struct cvmx_lmcx_int_cn61xx cn63xxp1;
+       struct cvmx_lmcx_int_cn61xx cn66xx;
+       struct cvmx_lmcx_int_cn61xx cn68xx;
+       struct cvmx_lmcx_int_cn61xx cn68xxp1;
+       struct cvmx_lmcx_int_cn70xx {
+               uint64_t reserved_12_63:52;
+               uint64_t ddr_err:1;
+               uint64_t dlcram_ded_err:1;
+               uint64_t dlcram_sec_err:1;
+               uint64_t ded_err:4;
+               uint64_t sec_err:4;
+               uint64_t nxm_wr_err:1;
+       } cn70xx;
+       struct cvmx_lmcx_int_cn70xx cn70xxp1;
+       struct cvmx_lmcx_int_s cn73xx;
+       struct cvmx_lmcx_int_s cn78xx;
+       struct cvmx_lmcx_int_s cn78xxp1;
+       struct cvmx_lmcx_int_cn61xx cnf71xx;
+       struct cvmx_lmcx_int_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_int_en
+ *
+ * Unused CSR in O75.
+ *
+ */
+union cvmx_lmcx_int_en {
+       u64 u64;
+       struct cvmx_lmcx_int_en_s {
+               uint64_t reserved_6_63:58;
+               uint64_t ddr_error_alert_ena:1;
+               uint64_t dlcram_ded_ena:1;
+               uint64_t dlcram_sec_ena:1;
+               uint64_t intr_ded_ena:1;
+               uint64_t intr_sec_ena:1;
+               uint64_t intr_nxm_wr_ena:1;
+       } s;
+       struct cvmx_lmcx_int_en_cn61xx {
+               uint64_t reserved_3_63:61;
+               uint64_t intr_ded_ena:1;
+               uint64_t intr_sec_ena:1;
+               uint64_t intr_nxm_wr_ena:1;
+       } cn61xx;
+       struct cvmx_lmcx_int_en_cn61xx cn63xx;
+       struct cvmx_lmcx_int_en_cn61xx cn63xxp1;
+       struct cvmx_lmcx_int_en_cn61xx cn66xx;
+       struct cvmx_lmcx_int_en_cn61xx cn68xx;
+       struct cvmx_lmcx_int_en_cn61xx cn68xxp1;
+       struct cvmx_lmcx_int_en_s cn70xx;
+       struct cvmx_lmcx_int_en_s cn70xxp1;
+       struct cvmx_lmcx_int_en_s cn73xx;
+       struct cvmx_lmcx_int_en_s cn78xx;
+       struct cvmx_lmcx_int_en_s cn78xxp1;
+       struct cvmx_lmcx_int_en_cn61xx cnf71xx;
+       struct cvmx_lmcx_int_en_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_lane#_crc_swiz
+ *
+ * This register contains the CRC bit swizzle for even and odd ranks.
+ *
+ */
+union cvmx_lmcx_lanex_crc_swiz {
+       u64 u64;
+       struct cvmx_lmcx_lanex_crc_swiz_s {
+               uint64_t reserved_56_63:8;
+               uint64_t r1_swiz7:3;
+               uint64_t r1_swiz6:3;
+               uint64_t r1_swiz5:3;
+               uint64_t r1_swiz4:3;
+               uint64_t r1_swiz3:3;
+               uint64_t r1_swiz2:3;
+               uint64_t r1_swiz1:3;
+               uint64_t r1_swiz0:3;
+               uint64_t reserved_24_31:8;
+               uint64_t r0_swiz7:3;
+               uint64_t r0_swiz6:3;
+               uint64_t r0_swiz5:3;
+               uint64_t r0_swiz4:3;
+               uint64_t r0_swiz3:3;
+               uint64_t r0_swiz2:3;
+               uint64_t r0_swiz1:3;
+               uint64_t r0_swiz0:3;
+       } s;
+       struct cvmx_lmcx_lanex_crc_swiz_s cn73xx;
+       struct cvmx_lmcx_lanex_crc_swiz_s cn78xx;
+       struct cvmx_lmcx_lanex_crc_swiz_s cn78xxp1;
+       struct cvmx_lmcx_lanex_crc_swiz_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mem_cfg0
+ *
+ * Specify the RSL base addresses for the block
+ *
+ *                  LMC_MEM_CFG0 = LMC Memory Configuration Register0
+ *
+ * This register controls certain parameters of  Memory Configuration
+ */
+union cvmx_lmcx_mem_cfg0 {
+       u64 u64;
+       struct cvmx_lmcx_mem_cfg0_s {
+               uint64_t reserved_32_63:32;
+               uint64_t reset:1;
+               uint64_t silo_qc:1;
+               uint64_t bunk_ena:1;
+               uint64_t ded_err:4;
+               uint64_t sec_err:4;
+               uint64_t intr_ded_ena:1;
+               uint64_t intr_sec_ena:1;
+               uint64_t tcl:4;
+               uint64_t ref_int:6;
+               uint64_t pbank_lsb:4;
+               uint64_t row_lsb:3;
+               uint64_t ecc_ena:1;
+               uint64_t init_start:1;
+       } s;
+       struct cvmx_lmcx_mem_cfg0_s cn30xx;
+       struct cvmx_lmcx_mem_cfg0_s cn31xx;
+       struct cvmx_lmcx_mem_cfg0_s cn38xx;
+       struct cvmx_lmcx_mem_cfg0_s cn38xxp2;
+       struct cvmx_lmcx_mem_cfg0_s cn50xx;
+       struct cvmx_lmcx_mem_cfg0_s cn52xx;
+       struct cvmx_lmcx_mem_cfg0_s cn52xxp1;
+       struct cvmx_lmcx_mem_cfg0_s cn56xx;
+       struct cvmx_lmcx_mem_cfg0_s cn56xxp1;
+       struct cvmx_lmcx_mem_cfg0_s cn58xx;
+       struct cvmx_lmcx_mem_cfg0_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_mem_cfg1
+ *
+ * LMC_MEM_CFG1 = LMC Memory Configuration Register1
+ *
+ * This register controls the External Memory Configuration Timing Parameters.
+ * Please refer to the appropriate DDR part spec from your memory vendor for
+ * the various values in this CSR. The details of each of these timing
+ * parameters can be found in the JEDEC spec or the vendor spec of the
+ * memory parts.
+ */
+union cvmx_lmcx_mem_cfg1 {
+       u64 u64;
+       struct cvmx_lmcx_mem_cfg1_s {
+               uint64_t reserved_32_63:32;
+               uint64_t comp_bypass:1;
+               uint64_t trrd:3;
+               uint64_t caslat:3;
+               uint64_t tmrd:3;
+               uint64_t trfc:5;
+               uint64_t trp:4;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:5;
+       } s;
+       struct cvmx_lmcx_mem_cfg1_s cn30xx;
+       struct cvmx_lmcx_mem_cfg1_s cn31xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx {
+               uint64_t reserved_31_63:33;
+               uint64_t trrd:3;
+               uint64_t caslat:3;
+               uint64_t tmrd:3;
+               uint64_t trfc:5;
+               uint64_t trp:4;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:5;
+       } cn38xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn38xxp2;
+       struct cvmx_lmcx_mem_cfg1_s cn50xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn52xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn52xxp1;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn56xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn56xxp1;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn58xx;
+       struct cvmx_lmcx_mem_cfg1_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_modereg_params0
+ *
+ * These parameters are written into the DDR3/DDR4 MR0, MR1, MR2 and MR3
+ * registers.
+ *
+ */
+union cvmx_lmcx_modereg_params0 {
+       u64 u64;
+       struct cvmx_lmcx_modereg_params0_s {
+               uint64_t reserved_28_63:36;
+               uint64_t wrp_ext:1;
+               uint64_t cl_ext:1;
+               uint64_t al_ext:1;
+               uint64_t ppd:1;
+               uint64_t wrp:3;
+               uint64_t dllr:1;
+               uint64_t tm:1;
+               uint64_t rbt:1;
+               uint64_t cl:4;
+               uint64_t bl:2;
+               uint64_t qoff:1;
+               uint64_t tdqs:1;
+               uint64_t wlev:1;
+               uint64_t al:2;
+               uint64_t dll:1;
+               uint64_t mpr:1;
+               uint64_t mprloc:2;
+               uint64_t cwl:3;
+       } s;
+       struct cvmx_lmcx_modereg_params0_cn61xx {
+               uint64_t reserved_25_63:39;
+               uint64_t ppd:1;
+               uint64_t wrp:3;
+               uint64_t dllr:1;
+               uint64_t tm:1;
+               uint64_t rbt:1;
+               uint64_t cl:4;
+               uint64_t bl:2;
+               uint64_t qoff:1;
+               uint64_t tdqs:1;
+               uint64_t wlev:1;
+               uint64_t al:2;
+               uint64_t dll:1;
+               uint64_t mpr:1;
+               uint64_t mprloc:2;
+               uint64_t cwl:3;
+       } cn61xx;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn63xx;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn63xxp1;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn66xx;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn68xx;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn68xxp1;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn70xx;
+       struct cvmx_lmcx_modereg_params0_cn61xx cn70xxp1;
+       struct cvmx_lmcx_modereg_params0_s cn73xx;
+       struct cvmx_lmcx_modereg_params0_s cn78xx;
+       struct cvmx_lmcx_modereg_params0_s cn78xxp1;
+       struct cvmx_lmcx_modereg_params0_cn61xx cnf71xx;
+       struct cvmx_lmcx_modereg_params0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params1
+ *
+ * These parameters are written into the DDR3 MR0, MR1, MR2 and MR3 registers.
+ *
+ */
+union cvmx_lmcx_modereg_params1 {
+       u64 u64;
+       struct cvmx_lmcx_modereg_params1_s {
+               uint64_t reserved_55_63:9;
+               uint64_t rtt_wr_11_ext:1;
+               uint64_t rtt_wr_10_ext:1;
+               uint64_t rtt_wr_01_ext:1;
+               uint64_t rtt_wr_00_ext:1;
+               uint64_t db_output_impedance:3;
+               uint64_t rtt_nom_11:3;
+               uint64_t dic_11:2;
+               uint64_t rtt_wr_11:2;
+               uint64_t srt_11:1;
+               uint64_t asr_11:1;
+               uint64_t pasr_11:3;
+               uint64_t rtt_nom_10:3;
+               uint64_t dic_10:2;
+               uint64_t rtt_wr_10:2;
+               uint64_t srt_10:1;
+               uint64_t asr_10:1;
+               uint64_t pasr_10:3;
+               uint64_t rtt_nom_01:3;
+               uint64_t dic_01:2;
+               uint64_t rtt_wr_01:2;
+               uint64_t srt_01:1;
+               uint64_t asr_01:1;
+               uint64_t pasr_01:3;
+               uint64_t rtt_nom_00:3;
+               uint64_t dic_00:2;
+               uint64_t rtt_wr_00:2;
+               uint64_t srt_00:1;
+               uint64_t asr_00:1;
+               uint64_t pasr_00:3;
+       } s;
+       struct cvmx_lmcx_modereg_params1_cn61xx {
+               uint64_t reserved_48_63:16;
+               uint64_t rtt_nom_11:3;
+               uint64_t dic_11:2;
+               uint64_t rtt_wr_11:2;
+               uint64_t srt_11:1;
+               uint64_t asr_11:1;
+               uint64_t pasr_11:3;
+               uint64_t rtt_nom_10:3;
+               uint64_t dic_10:2;
+               uint64_t rtt_wr_10:2;
+               uint64_t srt_10:1;
+               uint64_t asr_10:1;
+               uint64_t pasr_10:3;
+               uint64_t rtt_nom_01:3;
+               uint64_t dic_01:2;
+               uint64_t rtt_wr_01:2;
+               uint64_t srt_01:1;
+               uint64_t asr_01:1;
+               uint64_t pasr_01:3;
+               uint64_t rtt_nom_00:3;
+               uint64_t dic_00:2;
+               uint64_t rtt_wr_00:2;
+               uint64_t srt_00:1;
+               uint64_t asr_00:1;
+               uint64_t pasr_00:3;
+       } cn61xx;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn63xx;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn63xxp1;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn66xx;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn68xx;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn68xxp1;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn70xx;
+       struct cvmx_lmcx_modereg_params1_cn61xx cn70xxp1;
+       struct cvmx_lmcx_modereg_params1_s cn73xx;
+       struct cvmx_lmcx_modereg_params1_s cn78xx;
+       struct cvmx_lmcx_modereg_params1_s cn78xxp1;
+       struct cvmx_lmcx_modereg_params1_cn61xx cnf71xx;
+       struct cvmx_lmcx_modereg_params1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params2
+ *
+ * These parameters are written into the DDR4 mode registers.
+ *
+ */
+union cvmx_lmcx_modereg_params2 {
+       u64 u64;
+       struct cvmx_lmcx_modereg_params2_s {
+               uint64_t reserved_41_63:23;
+               uint64_t vrefdq_train_en:1;
+               uint64_t vref_range_11:1;
+               uint64_t vref_value_11:6;
+               uint64_t rtt_park_11:3;
+               uint64_t vref_range_10:1;
+               uint64_t vref_value_10:6;
+               uint64_t rtt_park_10:3;
+               uint64_t vref_range_01:1;
+               uint64_t vref_value_01:6;
+               uint64_t rtt_park_01:3;
+               uint64_t vref_range_00:1;
+               uint64_t vref_value_00:6;
+               uint64_t rtt_park_00:3;
+       } s;
+       struct cvmx_lmcx_modereg_params2_s cn70xx;
+       struct cvmx_lmcx_modereg_params2_cn70xxp1 {
+               uint64_t reserved_40_63:24;
+               uint64_t vref_range_11:1;
+               uint64_t vref_value_11:6;
+               uint64_t rtt_park_11:3;
+               uint64_t vref_range_10:1;
+               uint64_t vref_value_10:6;
+               uint64_t rtt_park_10:3;
+               uint64_t vref_range_01:1;
+               uint64_t vref_value_01:6;
+               uint64_t rtt_park_01:3;
+               uint64_t vref_range_00:1;
+               uint64_t vref_value_00:6;
+               uint64_t rtt_park_00:3;
+       } cn70xxp1;
+       struct cvmx_lmcx_modereg_params2_s cn73xx;
+       struct cvmx_lmcx_modereg_params2_s cn78xx;
+       struct cvmx_lmcx_modereg_params2_s cn78xxp1;
+       struct cvmx_lmcx_modereg_params2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params3
+ *
+ * These parameters are written into the DDR4 mode registers.
+ *
+ */
+union cvmx_lmcx_modereg_params3 {
+       u64 u64;
+       struct cvmx_lmcx_modereg_params3_s {
+               uint64_t reserved_39_63:25;
+               uint64_t xrank_add_tccd_l:3;
+               uint64_t xrank_add_tccd_s:3;
+               uint64_t mpr_fmt:2;
+               uint64_t wr_cmd_lat:2;
+               uint64_t fgrm:3;
+               uint64_t temp_sense:1;
+               uint64_t pda:1;
+               uint64_t gd:1;
+               uint64_t crc:1;
+               uint64_t lpasr:2;
+               uint64_t tccd_l:3;
+               uint64_t rd_dbi:1;
+               uint64_t wr_dbi:1;
+               uint64_t dm:1;
+               uint64_t ca_par_pers:1;
+               uint64_t odt_pd:1;
+               uint64_t par_lat_mode:3;
+               uint64_t wr_preamble:1;
+               uint64_t rd_preamble:1;
+               uint64_t sre_abort:1;
+               uint64_t cal:3;
+               uint64_t vref_mon:1;
+               uint64_t tc_ref:1;
+               uint64_t max_pd:1;
+       } s;
+       struct cvmx_lmcx_modereg_params3_cn70xx {
+               uint64_t reserved_33_63:31;
+               uint64_t mpr_fmt:2;
+               uint64_t wr_cmd_lat:2;
+               uint64_t fgrm:3;
+               uint64_t temp_sense:1;
+               uint64_t pda:1;
+               uint64_t gd:1;
+               uint64_t crc:1;
+               uint64_t lpasr:2;
+               uint64_t tccd_l:3;
+               uint64_t rd_dbi:1;
+               uint64_t wr_dbi:1;
+               uint64_t dm:1;
+               uint64_t ca_par_pers:1;
+               uint64_t odt_pd:1;
+               uint64_t par_lat_mode:3;
+               uint64_t wr_preamble:1;
+               uint64_t rd_preamble:1;
+               uint64_t sre_abort:1;
+               uint64_t cal:3;
+               uint64_t vref_mon:1;
+               uint64_t tc_ref:1;
+               uint64_t max_pd:1;
+       } cn70xx;
+       struct cvmx_lmcx_modereg_params3_cn70xx cn70xxp1;
+       struct cvmx_lmcx_modereg_params3_s cn73xx;
+       struct cvmx_lmcx_modereg_params3_s cn78xx;
+       struct cvmx_lmcx_modereg_params3_s cn78xxp1;
+       struct cvmx_lmcx_modereg_params3_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data0
+ *
+ * This register provides bits <63:0> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data0 {
+       u64 u64;
+       struct cvmx_lmcx_mpr_data0_s {
+               uint64_t mpr_data:64;
+       } s;
+       struct cvmx_lmcx_mpr_data0_s cn70xx;
+       struct cvmx_lmcx_mpr_data0_s cn70xxp1;
+       struct cvmx_lmcx_mpr_data0_s cn73xx;
+       struct cvmx_lmcx_mpr_data0_s cn78xx;
+       struct cvmx_lmcx_mpr_data0_s cn78xxp1;
+       struct cvmx_lmcx_mpr_data0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data1
+ *
+ * This register provides bits <127:64> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data1 {
+       u64 u64;
+       struct cvmx_lmcx_mpr_data1_s {
+               uint64_t mpr_data:64;
+       } s;
+       struct cvmx_lmcx_mpr_data1_s cn70xx;
+       struct cvmx_lmcx_mpr_data1_s cn70xxp1;
+       struct cvmx_lmcx_mpr_data1_s cn73xx;
+       struct cvmx_lmcx_mpr_data1_s cn78xx;
+       struct cvmx_lmcx_mpr_data1_s cn78xxp1;
+       struct cvmx_lmcx_mpr_data1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data2
+ *
+ * This register provides bits <143:128> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data2 {
+       u64 u64;
+       struct cvmx_lmcx_mpr_data2_s {
+               uint64_t reserved_16_63:48;
+               uint64_t mpr_data:16;
+       } s;
+       struct cvmx_lmcx_mpr_data2_s cn70xx;
+       struct cvmx_lmcx_mpr_data2_s cn70xxp1;
+       struct cvmx_lmcx_mpr_data2_s cn73xx;
+       struct cvmx_lmcx_mpr_data2_s cn78xx;
+       struct cvmx_lmcx_mpr_data2_s cn78xxp1;
+       struct cvmx_lmcx_mpr_data2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mr_mpr_ctl
+ *
+ * This register provides the control functions when programming the MPR
+ * of DDR4 DRAMs.
+ *
+ */
+union cvmx_lmcx_mr_mpr_ctl {
+       u64 u64;
+       struct cvmx_lmcx_mr_mpr_ctl_s {
+               uint64_t reserved_61_63:3;
+               uint64_t mr_wr_secure_key_ena:1;
+               uint64_t pba_func_space:3;
+               uint64_t mr_wr_bg1:1;
+               uint64_t mpr_sample_dq_enable:1;
+               uint64_t pda_early_dqx:1;
+               uint64_t mr_wr_pba_enable:1;
+               uint64_t mr_wr_use_default_value:1;
+               uint64_t mpr_whole_byte_enable:1;
+               uint64_t mpr_byte_select:4;
+               uint64_t mpr_bit_select:2;
+               uint64_t mpr_wr:1;
+               uint64_t mpr_loc:2;
+               uint64_t mr_wr_pda_enable:1;
+               uint64_t mr_wr_pda_mask:18;
+               uint64_t mr_wr_rank:2;
+               uint64_t mr_wr_sel:3;
+               uint64_t mr_wr_addr:18;
+       } s;
+       struct cvmx_lmcx_mr_mpr_ctl_cn70xx {
+               uint64_t reserved_52_63:12;
+               uint64_t mpr_whole_byte_enable:1;
+               uint64_t mpr_byte_select:4;
+               uint64_t mpr_bit_select:2;
+               uint64_t mpr_wr:1;
+               uint64_t mpr_loc:2;
+               uint64_t mr_wr_pda_enable:1;
+               uint64_t mr_wr_pda_mask:18;
+               uint64_t mr_wr_rank:2;
+               uint64_t mr_wr_sel:3;
+               uint64_t mr_wr_addr:18;
+       } cn70xx;
+       struct cvmx_lmcx_mr_mpr_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_mr_mpr_ctl_s cn73xx;
+       struct cvmx_lmcx_mr_mpr_ctl_s cn78xx;
+       struct cvmx_lmcx_mr_mpr_ctl_s cn78xxp1;
+       struct cvmx_lmcx_mr_mpr_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ns_ctl
+ *
+ * This register contains control parameters for handling nonsecure accesses.
+ *
+ */
+union cvmx_lmcx_ns_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ns_ctl_s {
+               uint64_t reserved_26_63:38;
+               uint64_t ns_scramble_dis:1;
+               uint64_t reserved_18_24:7;
+               uint64_t adr_offset:18;
+       } s;
+       struct cvmx_lmcx_ns_ctl_s cn73xx;
+       struct cvmx_lmcx_ns_ctl_s cn78xx;
+       struct cvmx_lmcx_ns_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_nxm
+ *
+ * Following is the decoding for mem_msb/rank:
+ * 0x0: mem_msb = mem_adr[25].
+ * 0x1: mem_msb = mem_adr[26].
+ * 0x2: mem_msb = mem_adr[27].
+ * 0x3: mem_msb = mem_adr[28].
+ * 0x4: mem_msb = mem_adr[29].
+ * 0x5: mem_msb = mem_adr[30].
+ * 0x6: mem_msb = mem_adr[31].
+ * 0x7: mem_msb = mem_adr[32].
+ * 0x8: mem_msb = mem_adr[33].
+ * 0x9: mem_msb = mem_adr[34].
+ * 0xA: mem_msb = mem_adr[35].
+ * 0xB: mem_msb = mem_adr[36].
+ * 0xC-0xF = Reserved.
+ *
+ * For example, for a DIMM made of Samsung's K4B1G0846C-ZCF7 1Gb
+ * (16M * 8 bit * 8 bank) parts, the column address width = 10; so with
+ * 10b of col, 3b of bus, 3b of bank, row_lsb = 16.
+ * Therefore, row = mem_adr[29:16] and mem_msb = 4.
+ *
+ * Note also that addresses greater than the max defined space (pbank_msb)
+ * are also treated as NXM accesses.
+ */
+union cvmx_lmcx_nxm {
+       u64 u64;
+       struct cvmx_lmcx_nxm_s {
+               uint64_t reserved_40_63:24;
+               uint64_t mem_msb_d3_r1:4;
+               uint64_t mem_msb_d3_r0:4;
+               uint64_t mem_msb_d2_r1:4;
+               uint64_t mem_msb_d2_r0:4;
+               uint64_t mem_msb_d1_r1:4;
+               uint64_t mem_msb_d1_r0:4;
+               uint64_t mem_msb_d0_r1:4;
+               uint64_t mem_msb_d0_r0:4;
+               uint64_t cs_mask:8;
+       } s;
+       struct cvmx_lmcx_nxm_cn52xx {
+               uint64_t reserved_8_63:56;
+               uint64_t cs_mask:8;
+       } cn52xx;
+       struct cvmx_lmcx_nxm_cn52xx cn56xx;
+       struct cvmx_lmcx_nxm_cn52xx cn58xx;
+       struct cvmx_lmcx_nxm_s cn61xx;
+       struct cvmx_lmcx_nxm_s cn63xx;
+       struct cvmx_lmcx_nxm_s cn63xxp1;
+       struct cvmx_lmcx_nxm_s cn66xx;
+       struct cvmx_lmcx_nxm_s cn68xx;
+       struct cvmx_lmcx_nxm_s cn68xxp1;
+       struct cvmx_lmcx_nxm_cn70xx {
+               uint64_t reserved_24_63:40;
+               uint64_t mem_msb_d1_r1:4;
+               uint64_t mem_msb_d1_r0:4;
+               uint64_t mem_msb_d0_r1:4;
+               uint64_t mem_msb_d0_r0:4;
+               uint64_t reserved_4_7:4;
+               uint64_t cs_mask:4;
+       } cn70xx;
+       struct cvmx_lmcx_nxm_cn70xx cn70xxp1;
+       struct cvmx_lmcx_nxm_cn70xx cn73xx;
+       struct cvmx_lmcx_nxm_cn70xx cn78xx;
+       struct cvmx_lmcx_nxm_cn70xx cn78xxp1;
+       struct cvmx_lmcx_nxm_s cnf71xx;
+       struct cvmx_lmcx_nxm_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_nxm_fadr
+ *
+ * This register captures only the first transaction with a NXM error while
+ * an interrupt is pending, and only captures a subsequent event once the
+ * interrupt is cleared by writing a one to LMC()_INT[NXM_ERR]. It captures
+ * the actual L2C-LMC address provided to the LMC that caused the NXM error.
+ * A read or write NXM error is captured only if enabled using the NXM
+ * event enables.
+ */
+union cvmx_lmcx_nxm_fadr {
+       u64 u64;
+       struct cvmx_lmcx_nxm_fadr_s {
+               uint64_t reserved_40_63:24;
+               uint64_t nxm_faddr_ext:1;
+               uint64_t nxm_src:1;
+               uint64_t nxm_type:1;
+               uint64_t nxm_faddr:37;
+       } s;
+       struct cvmx_lmcx_nxm_fadr_cn70xx {
+               uint64_t reserved_39_63:25;
+               uint64_t nxm_src:1;
+               uint64_t nxm_type:1;
+               uint64_t nxm_faddr:37;
+       } cn70xx;
+       struct cvmx_lmcx_nxm_fadr_cn70xx cn70xxp1;
+       struct cvmx_lmcx_nxm_fadr_s cn73xx;
+       struct cvmx_lmcx_nxm_fadr_s cn78xx;
+       struct cvmx_lmcx_nxm_fadr_s cn78xxp1;
+       struct cvmx_lmcx_nxm_fadr_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt
+ *
+ * LMC_OPS_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt {
+       u64 u64;
+       struct cvmx_lmcx_ops_cnt_s {
+               uint64_t opscnt:64;
+       } s;
+       struct cvmx_lmcx_ops_cnt_s cn61xx;
+       struct cvmx_lmcx_ops_cnt_s cn63xx;
+       struct cvmx_lmcx_ops_cnt_s cn63xxp1;
+       struct cvmx_lmcx_ops_cnt_s cn66xx;
+       struct cvmx_lmcx_ops_cnt_s cn68xx;
+       struct cvmx_lmcx_ops_cnt_s cn68xxp1;
+       struct cvmx_lmcx_ops_cnt_s cn70xx;
+       struct cvmx_lmcx_ops_cnt_s cn70xxp1;
+       struct cvmx_lmcx_ops_cnt_s cn73xx;
+       struct cvmx_lmcx_ops_cnt_s cn78xx;
+       struct cvmx_lmcx_ops_cnt_s cn78xxp1;
+       struct cvmx_lmcx_ops_cnt_s cnf71xx;
+       struct cvmx_lmcx_ops_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt_hi
+ *
+ * LMC_OPS_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt_hi {
+       u64 u64;
+       struct cvmx_lmcx_ops_cnt_hi_s {
+               uint64_t reserved_32_63:32;
+               uint64_t opscnt_hi:32;
+       } s;
+       struct cvmx_lmcx_ops_cnt_hi_s cn30xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn31xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn38xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn38xxp2;
+       struct cvmx_lmcx_ops_cnt_hi_s cn50xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn52xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn52xxp1;
+       struct cvmx_lmcx_ops_cnt_hi_s cn56xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn56xxp1;
+       struct cvmx_lmcx_ops_cnt_hi_s cn58xx;
+       struct cvmx_lmcx_ops_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt_lo
+ *
+ * LMC_OPS_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt_lo {
+       u64 u64;
+       struct cvmx_lmcx_ops_cnt_lo_s {
+               uint64_t reserved_32_63:32;
+               uint64_t opscnt_lo:32;
+       } s;
+       struct cvmx_lmcx_ops_cnt_lo_s cn30xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn31xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn38xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn38xxp2;
+       struct cvmx_lmcx_ops_cnt_lo_s cn50xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn52xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn52xxp1;
+       struct cvmx_lmcx_ops_cnt_lo_s cn56xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn56xxp1;
+       struct cvmx_lmcx_ops_cnt_lo_s cn58xx;
+       struct cvmx_lmcx_ops_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_phy_ctl
+ *
+ * LMC_PHY_CTL = LMC PHY Control
+ *
+ */
+union cvmx_lmcx_phy_ctl {
+       u64 u64;
+       struct cvmx_lmcx_phy_ctl_s {
+               uint64_t reserved_61_63:3;
+               uint64_t dsk_dbg_load_dis:1;
+               uint64_t dsk_dbg_overwrt_ena:1;
+               uint64_t dsk_dbg_wr_mode:1;
+               uint64_t data_rate_loopback:1;
+               uint64_t dq_shallow_loopback:1;
+               uint64_t dm_disable:1;
+               uint64_t c1_sel:2;
+               uint64_t c0_sel:2;
+               uint64_t phy_reset:1;
+               uint64_t dsk_dbg_rd_complete:1;
+               uint64_t dsk_dbg_rd_data:10;
+               uint64_t dsk_dbg_rd_start:1;
+               uint64_t dsk_dbg_clk_scaler:2;
+               uint64_t dsk_dbg_offset:2;
+               uint64_t dsk_dbg_num_bits_sel:1;
+               uint64_t dsk_dbg_byte_sel:4;
+               uint64_t dsk_dbg_bit_sel:4;
+               uint64_t dbi_mode_ena:1;
+               uint64_t ddr_error_n_ena:1;
+               uint64_t ref_pin_on:1;
+               uint64_t dac_on:1;
+               uint64_t int_pad_loopback_ena:1;
+               uint64_t int_phy_loopback_ena:1;
+               uint64_t phy_dsk_reset:1;
+               uint64_t phy_dsk_byp:1;
+               uint64_t phy_pwr_save_disable:1;
+               uint64_t ten:1;
+               uint64_t rx_always_on:1;
+               uint64_t lv_mode:1;
+               uint64_t ck_tune1:1;
+               uint64_t ck_dlyout1:4;
+               uint64_t ck_tune0:1;
+               uint64_t ck_dlyout0:4;
+               uint64_t loopback:1;
+               uint64_t loopback_pos:1;
+               uint64_t ts_stagger:1;
+       } s;
+       struct cvmx_lmcx_phy_ctl_cn61xx {
+               uint64_t reserved_15_63:49;
+               uint64_t rx_always_on:1;
+               uint64_t lv_mode:1;
+               uint64_t ck_tune1:1;
+               uint64_t ck_dlyout1:4;
+               uint64_t ck_tune0:1;
+               uint64_t ck_dlyout0:4;
+               uint64_t loopback:1;
+               uint64_t loopback_pos:1;
+               uint64_t ts_stagger:1;
+       } cn61xx;
+       struct cvmx_lmcx_phy_ctl_cn61xx cn63xx;
+       struct cvmx_lmcx_phy_ctl_cn63xxp1 {
+               uint64_t reserved_14_63:50;
+               uint64_t lv_mode:1;
+               uint64_t ck_tune1:1;
+               uint64_t ck_dlyout1:4;
+               uint64_t ck_tune0:1;
+               uint64_t ck_dlyout0:4;
+               uint64_t loopback:1;
+               uint64_t loopback_pos:1;
+               uint64_t ts_stagger:1;
+       } cn63xxp1;
+       struct cvmx_lmcx_phy_ctl_cn61xx cn66xx;
+       struct cvmx_lmcx_phy_ctl_cn61xx cn68xx;
+       struct cvmx_lmcx_phy_ctl_cn61xx cn68xxp1;
+       struct cvmx_lmcx_phy_ctl_cn70xx {
+               uint64_t reserved_51_63:13;
+               uint64_t phy_reset:1;
+               uint64_t dsk_dbg_rd_complete:1;
+               uint64_t dsk_dbg_rd_data:10;
+               uint64_t dsk_dbg_rd_start:1;
+               uint64_t dsk_dbg_clk_scaler:2;
+               uint64_t dsk_dbg_offset:2;
+               uint64_t dsk_dbg_num_bits_sel:1;
+               uint64_t dsk_dbg_byte_sel:4;
+               uint64_t dsk_dbg_bit_sel:4;
+               uint64_t dbi_mode_ena:1;
+               uint64_t ddr_error_n_ena:1;
+               uint64_t ref_pin_on:1;
+               uint64_t dac_on:1;
+               uint64_t int_pad_loopback_ena:1;
+               uint64_t int_phy_loopback_ena:1;
+               uint64_t phy_dsk_reset:1;
+               uint64_t phy_dsk_byp:1;
+               uint64_t phy_pwr_save_disable:1;
+               uint64_t ten:1;
+               uint64_t rx_always_on:1;
+               uint64_t lv_mode:1;
+               uint64_t ck_tune1:1;
+               uint64_t ck_dlyout1:4;
+               uint64_t ck_tune0:1;
+               uint64_t ck_dlyout0:4;
+               uint64_t loopback:1;
+               uint64_t loopback_pos:1;
+               uint64_t ts_stagger:1;
+       } cn70xx;
+       struct cvmx_lmcx_phy_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_phy_ctl_cn73xx {
+               uint64_t reserved_58_63:6;
+               uint64_t data_rate_loopback:1;
+               uint64_t dq_shallow_loopback:1;
+               uint64_t dm_disable:1;
+               uint64_t c1_sel:2;
+               uint64_t c0_sel:2;
+               uint64_t phy_reset:1;
+               uint64_t dsk_dbg_rd_complete:1;
+               uint64_t dsk_dbg_rd_data:10;
+               uint64_t dsk_dbg_rd_start:1;
+               uint64_t dsk_dbg_clk_scaler:2;
+               uint64_t dsk_dbg_offset:2;
+               uint64_t dsk_dbg_num_bits_sel:1;
+               uint64_t dsk_dbg_byte_sel:4;
+               uint64_t dsk_dbg_bit_sel:4;
+               uint64_t dbi_mode_ena:1;
+               uint64_t ddr_error_n_ena:1;
+               uint64_t ref_pin_on:1;
+               uint64_t dac_on:1;
+               uint64_t int_pad_loopback_ena:1;
+               uint64_t int_phy_loopback_ena:1;
+               uint64_t phy_dsk_reset:1;
+               uint64_t phy_dsk_byp:1;
+               uint64_t phy_pwr_save_disable:1;
+               uint64_t ten:1;
+               uint64_t rx_always_on:1;
+               uint64_t lv_mode:1;
+               uint64_t ck_tune1:1;
+               uint64_t ck_dlyout1:4;
+               uint64_t ck_tune0:1;
+               uint64_t ck_dlyout0:4;
+               uint64_t loopback:1;
+               uint64_t loopback_pos:1;
+               uint64_t ts_stagger:1;
+       } cn73xx;
+       struct cvmx_lmcx_phy_ctl_s cn78xx;
+       struct cvmx_lmcx_phy_ctl_s cn78xxp1;
+       struct cvmx_lmcx_phy_ctl_cn61xx cnf71xx;
+       struct cvmx_lmcx_phy_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_phy_ctl2
+ */
+union cvmx_lmcx_phy_ctl2 {
+       u64 u64;
+       struct cvmx_lmcx_phy_ctl2_s {
+               uint64_t reserved_27_63:37;
+               uint64_t dqs8_dsk_adj:3;
+               uint64_t dqs7_dsk_adj:3;
+               uint64_t dqs6_dsk_adj:3;
+               uint64_t dqs5_dsk_adj:3;
+               uint64_t dqs4_dsk_adj:3;
+               uint64_t dqs3_dsk_adj:3;
+               uint64_t dqs2_dsk_adj:3;
+               uint64_t dqs1_dsk_adj:3;
+               uint64_t dqs0_dsk_adj:3;
+       } s;
+       struct cvmx_lmcx_phy_ctl2_s cn78xx;
+       struct cvmx_lmcx_phy_ctl2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_pll_bwctl
+ *
+ * LMC_PLL_BWCTL  = DDR PLL Bandwidth Control Register
+ *
+ */
+union cvmx_lmcx_pll_bwctl {
+       u64 u64;
+       struct cvmx_lmcx_pll_bwctl_s {
+               uint64_t reserved_5_63:59;
+               uint64_t bwupd:1;
+               uint64_t bwctl:4;
+       } s;
+       struct cvmx_lmcx_pll_bwctl_s cn30xx;
+       struct cvmx_lmcx_pll_bwctl_s cn31xx;
+       struct cvmx_lmcx_pll_bwctl_s cn38xx;
+       struct cvmx_lmcx_pll_bwctl_s cn38xxp2;
+};
+
+/**
+ * cvmx_lmc#_pll_ctl
+ *
+ * LMC_PLL_CTL = LMC pll control
+ *
+ *
+ * Notes:
+ * This CSR is only relevant for LMC0. LMC1_PLL_CTL is not used.
+ *
+ * Exactly one of EN2, EN4, EN6, EN8, EN12, EN16 must be set.
+ *
+ * The resultant DDR_CK frequency is the DDR2_REF_CLK
+ * frequency multiplied by:
+ *
+ *     (CLKF + 1) / ((CLKR + 1) * EN(2,4,6,8,12,16))
+ *
+ * The PLL frequency, which is:
+ *
+ *     (DDR2_REF_CLK freq) * ((CLKF + 1) / (CLKR + 1))
+ *
+ * must reside between 1.2 and 2.5 GHz. A faster PLL frequency is
+ * desirable if there is a choice.
+ */
+union cvmx_lmcx_pll_ctl {
+       u64 u64;
+       struct cvmx_lmcx_pll_ctl_s {
+               uint64_t reserved_30_63:34;
+               uint64_t bypass:1;
+               uint64_t fasten_n:1;
+               uint64_t div_reset:1;
+               uint64_t reset_n:1;
+               uint64_t clkf:12;
+               uint64_t clkr:6;
+               uint64_t reserved_6_7:2;
+               uint64_t en16:1;
+               uint64_t en12:1;
+               uint64_t en8:1;
+               uint64_t en6:1;
+               uint64_t en4:1;
+               uint64_t en2:1;
+       } s;
+       struct cvmx_lmcx_pll_ctl_cn50xx {
+               uint64_t reserved_29_63:35;
+               uint64_t fasten_n:1;
+               uint64_t div_reset:1;
+               uint64_t reset_n:1;
+               uint64_t clkf:12;
+               uint64_t clkr:6;
+               uint64_t reserved_6_7:2;
+               uint64_t en16:1;
+               uint64_t en12:1;
+               uint64_t en8:1;
+               uint64_t en6:1;
+               uint64_t en4:1;
+               uint64_t en2:1;
+       } cn50xx;
+       struct cvmx_lmcx_pll_ctl_s cn52xx;
+       struct cvmx_lmcx_pll_ctl_s cn52xxp1;
+       struct cvmx_lmcx_pll_ctl_cn50xx cn56xx;
+       struct cvmx_lmcx_pll_ctl_cn56xxp1 {
+               uint64_t reserved_28_63:36;
+               uint64_t div_reset:1;
+               uint64_t reset_n:1;
+               uint64_t clkf:12;
+               uint64_t clkr:6;
+               uint64_t reserved_6_7:2;
+               uint64_t en16:1;
+               uint64_t en12:1;
+               uint64_t en8:1;
+               uint64_t en6:1;
+               uint64_t en4:1;
+               uint64_t en2:1;
+       } cn56xxp1;
+       struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xx;
+       struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_pll_status
+ *
+ * LMC_PLL_STATUS = LMC pll status
+ *
+ */
+union cvmx_lmcx_pll_status {
+       u64 u64;
+       struct cvmx_lmcx_pll_status_s {
+               uint64_t reserved_32_63:32;
+               uint64_t ddr__nctl:5;
+               uint64_t ddr__pctl:5;
+               uint64_t reserved_2_21:20;
+               uint64_t rfslip:1;
+               uint64_t fbslip:1;
+       } s;
+       struct cvmx_lmcx_pll_status_s cn50xx;
+       struct cvmx_lmcx_pll_status_s cn52xx;
+       struct cvmx_lmcx_pll_status_s cn52xxp1;
+       struct cvmx_lmcx_pll_status_s cn56xx;
+       struct cvmx_lmcx_pll_status_s cn56xxp1;
+       struct cvmx_lmcx_pll_status_s cn58xx;
+       struct cvmx_lmcx_pll_status_cn58xxp1 {
+               uint64_t reserved_2_63:62;
+               uint64_t rfslip:1;
+               uint64_t fbslip:1;
+       } cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ppr_ctl
+ *
+ * This register contains programmable timing and control parameters used
+ * when running the post package repair sequence. The timing fields
+ * PPR_CTL[TPGMPST], PPR_CTL[TPGM_EXIT] and PPR_CTL[TPGM] need to be set as
+ * to satisfy the minimum values mentioned in the JEDEC DDR4 spec before
+ * running the PPR sequence. See LMC()_SEQ_CTL[SEQ_SEL,INIT_START] to run
+ * the PPR sequence.
+ *
+ * Running hard PPR may require LMC to issue security key as four consecutive
+ * MR0 commands, each with a unique address field A[17:0]. Set the security
+ * key in the general purpose CSRs as follows:
+ *
+ * _ Security key 0 = LMC()_GENERAL_PURPOSE0[DATA]<17:0>.
+ * _ Security key 1 = LMC()_GENERAL_PURPOSE0[DATA]<35:18>.
+ * _ Security key 2 = LMC()_GENERAL_PURPOSE1[DATA]<17:0>.
+ * _ Security key 3 = LMC()_GENERAL_PURPOSE1[DATA]<35:18>.
+ */
+union cvmx_lmcx_ppr_ctl {
+       u64 u64;
+       struct cvmx_lmcx_ppr_ctl_s {
+               uint64_t reserved_27_63:37;
+               uint64_t lrank_sel:3;
+               uint64_t skip_issue_security:1;
+               uint64_t sppr:1;
+               uint64_t tpgm:10;
+               uint64_t tpgm_exit:5;
+               uint64_t tpgmpst:7;
+       } s;
+       struct cvmx_lmcx_ppr_ctl_cn73xx {
+               uint64_t reserved_24_63:40;
+               uint64_t skip_issue_security:1;
+               uint64_t sppr:1;
+               uint64_t tpgm:10;
+               uint64_t tpgm_exit:5;
+               uint64_t tpgmpst:7;
+       } cn73xx;
+       struct cvmx_lmcx_ppr_ctl_s cn78xx;
+       struct cvmx_lmcx_ppr_ctl_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_read_level_ctl
+ *
+ * Notes:
+ * The HW writes and reads the cache block selected by ROW, COL, BNK and
+ * the rank as part of a read-leveling sequence for a rank.
+ * A cache block write is 16 72-bit words. PATTERN selects the write value.
+ * For the first 8 words, the write value is the bit PATTERN<i> duplicated
+ * into a 72-bit vector. The write value of the last 8 words is the inverse
+ * of the write value of the first 8 words. See LMC*_READ_LEVEL_RANK*.
+ */
+union cvmx_lmcx_read_level_ctl {
+       u64 u64;
+       struct cvmx_lmcx_read_level_ctl_s {
+               uint64_t reserved_44_63:20;
+               uint64_t rankmask:4;
+               uint64_t pattern:8;
+               uint64_t row:16;
+               uint64_t col:12;
+               uint64_t reserved_3_3:1;
+               uint64_t bnk:3;
+       } s;
+       struct cvmx_lmcx_read_level_ctl_s cn52xx;
+       struct cvmx_lmcx_read_level_ctl_s cn52xxp1;
+       struct cvmx_lmcx_read_level_ctl_s cn56xx;
+       struct cvmx_lmcx_read_level_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_read_level_dbg
+ *
+ * Notes:
+ * A given read of LMC*_READ_LEVEL_DBG returns the read-leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only one
+ * byte in the last rank that the HW read-leveled.
+ * LMC*_READ_LEVEL_DBG[BYTE] selects the particular byte.
+ * To get these pass/fail results for another different rank, you must run
+ * the hardware read-leveling again. For example, it is possible to get the
+ * BITMASK results for every byte of every rank if you run read-leveling
+ * separately for each rank, probing LMC*_READ_LEVEL_DBG between each
+ * read-leveling.
+ */
+union cvmx_lmcx_read_level_dbg {
+       u64 u64;
+       struct cvmx_lmcx_read_level_dbg_s {
+               uint64_t reserved_32_63:32;
+               uint64_t bitmask:16;
+               uint64_t reserved_4_15:12;
+               uint64_t byte:4;
+       } s;
+       struct cvmx_lmcx_read_level_dbg_s cn52xx;
+       struct cvmx_lmcx_read_level_dbg_s cn52xxp1;
+       struct cvmx_lmcx_read_level_dbg_s cn56xx;
+       struct cvmx_lmcx_read_level_dbg_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_read_level_rank#
+ *
+ * Notes:
+ * This is four CSRs per LMC, one per each rank.
+ * Each CSR is written by HW during a read-leveling sequence for the rank.
+ * (HW sets STATUS==3 after HW read-leveling completes for the rank.)
+ * Each CSR may also be written by SW, but not while a read-leveling sequence
+ * is in progress. (HW sets STATUS==1 after a CSR write.)
+ * Deskew setting is measured in units of 1/4 DCLK, so the above BYTE*
+ * values can range over 4 DCLKs.
+ * SW initiates a HW read-leveling sequence by programming
+ * LMC*_READ_LEVEL_CTL and writing INIT_START=1 with SEQUENCE=1.
+ * See LMC*_READ_LEVEL_CTL.
+ */
+union cvmx_lmcx_read_level_rankx {
+       u64 u64;
+       struct cvmx_lmcx_read_level_rankx_s {
+               uint64_t reserved_38_63:26;
+               uint64_t status:2;
+               uint64_t byte8:4;
+               uint64_t byte7:4;
+               uint64_t byte6:4;
+               uint64_t byte5:4;
+               uint64_t byte4:4;
+               uint64_t byte3:4;
+               uint64_t byte2:4;
+               uint64_t byte1:4;
+               uint64_t byte0:4;
+       } s;
+       struct cvmx_lmcx_read_level_rankx_s cn52xx;
+       struct cvmx_lmcx_read_level_rankx_s cn52xxp1;
+       struct cvmx_lmcx_read_level_rankx_s cn56xx;
+       struct cvmx_lmcx_read_level_rankx_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_ref_status
+ *
+ * This register contains the status of the refresh pending counter.
+ *
+ */
+union cvmx_lmcx_ref_status {
+       u64 u64;
+       struct cvmx_lmcx_ref_status_s {
+               uint64_t reserved_4_63:60;
+               uint64_t ref_pend_max_clr:1;
+               uint64_t ref_count:3;
+       } s;
+       struct cvmx_lmcx_ref_status_s cn73xx;
+       struct cvmx_lmcx_ref_status_s cn78xx;
+       struct cvmx_lmcx_ref_status_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_reset_ctl
+ *
+ * Specify the RSL base addresses for the block.
+ *
+ */
+union cvmx_lmcx_reset_ctl {
+       u64 u64;
+       struct cvmx_lmcx_reset_ctl_s {
+               uint64_t reserved_4_63:60;
+               uint64_t ddr3psv:1;
+               uint64_t ddr3psoft:1;
+               uint64_t ddr3pwarm:1;
+               uint64_t ddr3rst:1;
+       } s;
+       struct cvmx_lmcx_reset_ctl_s cn61xx;
+       struct cvmx_lmcx_reset_ctl_s cn63xx;
+       struct cvmx_lmcx_reset_ctl_s cn63xxp1;
+       struct cvmx_lmcx_reset_ctl_s cn66xx;
+       struct cvmx_lmcx_reset_ctl_s cn68xx;
+       struct cvmx_lmcx_reset_ctl_s cn68xxp1;
+       struct cvmx_lmcx_reset_ctl_s cn70xx;
+       struct cvmx_lmcx_reset_ctl_s cn70xxp1;
+       struct cvmx_lmcx_reset_ctl_s cn73xx;
+       struct cvmx_lmcx_reset_ctl_s cn78xx;
+       struct cvmx_lmcx_reset_ctl_s cn78xxp1;
+       struct cvmx_lmcx_reset_ctl_s cnf71xx;
+       struct cvmx_lmcx_reset_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_retry_config
+ *
+ * This register configures automatic retry operation.
+ *
+ */
+union cvmx_lmcx_retry_config {
+       u64 u64;
+       struct cvmx_lmcx_retry_config_s {
+               uint64_t reserved_56_63:8;
+               uint64_t max_errors:24;
+               uint64_t reserved_13_31:19;
+               uint64_t error_continue:1;
+               uint64_t reserved_9_11:3;
+               uint64_t auto_error_continue:1;
+               uint64_t reserved_5_7:3;
+               uint64_t pulse_count_auto_clr:1;
+               uint64_t reserved_1_3:3;
+               uint64_t retry_enable:1;
+       } s;
+       struct cvmx_lmcx_retry_config_s cn73xx;
+       struct cvmx_lmcx_retry_config_s cn78xx;
+       struct cvmx_lmcx_retry_config_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_retry_status
+ *
+ * This register provides status on automatic retry operation.
+ *
+ */
+union cvmx_lmcx_retry_status {
+       u64 u64;
+       struct cvmx_lmcx_retry_status_s {
+               uint64_t clear_error_count:1;
+               uint64_t clear_error_pulse_count:1;
+               uint64_t reserved_57_61:5;
+               uint64_t error_pulse_count_valid:1;
+               uint64_t error_pulse_count_sat:1;
+               uint64_t reserved_52_54:3;
+               uint64_t error_pulse_count:4;
+               uint64_t reserved_45_47:3;
+               uint64_t error_sequence:5;
+               uint64_t reserved_33_39:7;
+               uint64_t error_type:1;
+               uint64_t reserved_24_31:8;
+               uint64_t error_count:24;
+       } s;
+       struct cvmx_lmcx_retry_status_s cn73xx;
+       struct cvmx_lmcx_retry_status_s cn78xx;
+       struct cvmx_lmcx_retry_status_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_ctl
+ */
+union cvmx_lmcx_rlevel_ctl {
+       u64 u64;
+       struct cvmx_lmcx_rlevel_ctl_s {
+               uint64_t reserved_33_63:31;
+               uint64_t tccd_sel:1;
+               uint64_t pattern:8;
+               uint64_t reserved_22_23:2;
+               uint64_t delay_unload_3:1;
+               uint64_t delay_unload_2:1;
+               uint64_t delay_unload_1:1;
+               uint64_t delay_unload_0:1;
+               uint64_t bitmask:8;
+               uint64_t or_dis:1;
+               uint64_t offset_en:1;
+               uint64_t offset:4;
+               uint64_t byte:4;
+       } s;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx {
+               uint64_t reserved_22_63:42;
+               uint64_t delay_unload_3:1;
+               uint64_t delay_unload_2:1;
+               uint64_t delay_unload_1:1;
+               uint64_t delay_unload_0:1;
+               uint64_t bitmask:8;
+               uint64_t or_dis:1;
+               uint64_t offset_en:1;
+               uint64_t offset:4;
+               uint64_t byte:4;
+       } cn61xx;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx cn63xx;
+       struct cvmx_lmcx_rlevel_ctl_cn63xxp1 {
+               uint64_t reserved_9_63:55;
+               uint64_t offset_en:1;
+               uint64_t offset:4;
+               uint64_t byte:4;
+       } cn63xxp1;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx cn66xx;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xx;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xxp1;
+       struct cvmx_lmcx_rlevel_ctl_cn70xx {
+               uint64_t reserved_32_63:32;
+               uint64_t pattern:8;
+               uint64_t reserved_22_23:2;
+               uint64_t delay_unload_3:1;
+               uint64_t delay_unload_2:1;
+               uint64_t delay_unload_1:1;
+               uint64_t delay_unload_0:1;
+               uint64_t bitmask:8;
+               uint64_t or_dis:1;
+               uint64_t offset_en:1;
+               uint64_t offset:4;
+               uint64_t byte:4;
+       } cn70xx;
+       struct cvmx_lmcx_rlevel_ctl_cn70xx cn70xxp1;
+       struct cvmx_lmcx_rlevel_ctl_cn70xx cn73xx;
+       struct cvmx_lmcx_rlevel_ctl_s cn78xx;
+       struct cvmx_lmcx_rlevel_ctl_s cn78xxp1;
+       struct cvmx_lmcx_rlevel_ctl_cn61xx cnf71xx;
+       struct cvmx_lmcx_rlevel_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_dbg
+ *
+ * A given read of LMC()_RLEVEL_DBG returns the read leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only
+ * one byte in the last rank that the hardware ran read leveling on.
+ * LMC()_RLEVEL_CTL[BYTE] selects the particular byte. To get these
+ * pass/fail results for a different rank, you must run the hardware
+ * read leveling again. For example, it is possible to get the [BITMASK]
+ * results for every byte of every rank if you run read leveling separately
+ * for each rank, probing LMC()_RLEVEL_DBG between each read- leveling.
+ */
+union cvmx_lmcx_rlevel_dbg {
+       u64 u64;
+       struct cvmx_lmcx_rlevel_dbg_s {
+               uint64_t bitmask:64;
+       } s;
+       struct cvmx_lmcx_rlevel_dbg_s cn61xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn63xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn63xxp1;
+       struct cvmx_lmcx_rlevel_dbg_s cn66xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn68xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn68xxp1;
+       struct cvmx_lmcx_rlevel_dbg_s cn70xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn70xxp1;
+       struct cvmx_lmcx_rlevel_dbg_s cn73xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn78xx;
+       struct cvmx_lmcx_rlevel_dbg_s cn78xxp1;
+       struct cvmx_lmcx_rlevel_dbg_s cnf71xx;
+       struct cvmx_lmcx_rlevel_dbg_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_rank#
+ *
+ * Four of these CSRs exist per LMC, one for each rank. Read level setting
+ * is measured in units of 1/4 CK, so the BYTEn values can range over 16 CK
+ * cycles. Each CSR is written by hardware during a read leveling sequence
+ * for the rank. (Hardware sets [STATUS] to 3 after hardware read leveling
+ * completes for the rank.)
+ *
+ * If hardware is unable to find a match per LMC()_RLEVEL_CTL[OFFSET_EN] and
+ * LMC()_RLEVEL_CTL[OFFSET], then hardware sets
+ * LMC()_RLEVEL_RANK()[BYTEn<5:0>] to 0x0.
+ *
+ * Each CSR may also be written by software, but not while a read leveling
+ * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.)
+ * Software initiates a hardware read leveling sequence by programming
+ * LMC()_RLEVEL_CTL and writing [INIT_START] = 1 with [SEQ_SEL]=1.
+ * See LMC()_RLEVEL_CTL.
+ *
+ * LMC()_RLEVEL_RANKi values for ranks i without attached DRAM should be set
+ * such that they do not increase the range of possible BYTE values for any
+ * byte lane. The easiest way to do this is to set LMC()_RLEVEL_RANKi =
+ * LMC()_RLEVEL_RANKj, where j is some rank with attached DRAM whose
+ * LMC()_RLEVEL_RANKj is already fully initialized.
+ */
+union cvmx_lmcx_rlevel_rankx {
+       u64 u64;
+       struct cvmx_lmcx_rlevel_rankx_s {
+               uint64_t reserved_56_63:8;
+               uint64_t status:2;
+               uint64_t byte8:6;
+               uint64_t byte7:6;
+               uint64_t byte6:6;
+               uint64_t byte5:6;
+               uint64_t byte4:6;
+               uint64_t byte3:6;
+               uint64_t byte2:6;
+               uint64_t byte1:6;
+               uint64_t byte0:6;
+       } s;
+       struct cvmx_lmcx_rlevel_rankx_s cn61xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn63xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn63xxp1;
+       struct cvmx_lmcx_rlevel_rankx_s cn66xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn68xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn68xxp1;
+       struct cvmx_lmcx_rlevel_rankx_s cn70xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn70xxp1;
+       struct cvmx_lmcx_rlevel_rankx_s cn73xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn78xx;
+       struct cvmx_lmcx_rlevel_rankx_s cn78xxp1;
+       struct cvmx_lmcx_rlevel_rankx_s cnf71xx;
+       struct cvmx_lmcx_rlevel_rankx_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rodt_comp_ctl
+ *
+ * LMC_RODT_COMP_CTL = LMC Compensation control
+ *
+ */
+union cvmx_lmcx_rodt_comp_ctl {
+       u64 u64;
+       struct cvmx_lmcx_rodt_comp_ctl_s {
+               uint64_t reserved_17_63:47;
+               uint64_t enable:1;
+               uint64_t reserved_12_15:4;
+               uint64_t nctl:4;
+               uint64_t reserved_5_7:3;
+               uint64_t pctl:5;
+       } s;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn50xx;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn52xx;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn52xxp1;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn56xx;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn56xxp1;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn58xx;
+       struct cvmx_lmcx_rodt_comp_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_rodt_ctl
+ *
+ * LMC_RODT_CTL = Obsolete LMC Read OnDieTermination control
+ * See the description in LMC_WODT_CTL1. On Reads, Octeon only supports
+ * turning on ODT's in the lower 2 DIMM's with the masks as below.
+ *
+ * Notes:
+ * When a given RANK in position N is selected, the RODT _HI and _LO masks
+ * for that position are used.
+ * Mask[3:0] is used for RODT control of the RANKs in positions 3, 2, 1,
+ * and 0, respectively.
+ * In  64b mode, DIMMs are assumed to be ordered in the following order:
+ *  position 3: [unused        , DIMM1_RANK1_LO]
+ *  position 2: [unused        , DIMM1_RANK0_LO]
+ *  position 1: [unused        , DIMM0_RANK1_LO]
+ *  position 0: [unused        , DIMM0_RANK0_LO]
+ * In 128b mode, DIMMs are assumed to be ordered in the following order:
+ *  position 3: [DIMM3_RANK1_HI, DIMM1_RANK1_LO]
+ *  position 2: [DIMM3_RANK0_HI, DIMM1_RANK0_LO]
+ *  position 1: [DIMM2_RANK1_HI, DIMM0_RANK1_LO]
+ *  position 0: [DIMM2_RANK0_HI, DIMM0_RANK0_LO]
+ */
+union cvmx_lmcx_rodt_ctl {
+       u64 u64;
+       struct cvmx_lmcx_rodt_ctl_s {
+               uint64_t reserved_32_63:32;
+               uint64_t rodt_hi3:4;
+               uint64_t rodt_hi2:4;
+               uint64_t rodt_hi1:4;
+               uint64_t rodt_hi0:4;
+               uint64_t rodt_lo3:4;
+               uint64_t rodt_lo2:4;
+               uint64_t rodt_lo1:4;
+               uint64_t rodt_lo0:4;
+       } s;
+       struct cvmx_lmcx_rodt_ctl_s cn30xx;
+       struct cvmx_lmcx_rodt_ctl_s cn31xx;
+       struct cvmx_lmcx_rodt_ctl_s cn38xx;
+       struct cvmx_lmcx_rodt_ctl_s cn38xxp2;
+       struct cvmx_lmcx_rodt_ctl_s cn50xx;
+       struct cvmx_lmcx_rodt_ctl_s cn52xx;
+       struct cvmx_lmcx_rodt_ctl_s cn52xxp1;
+       struct cvmx_lmcx_rodt_ctl_s cn56xx;
+       struct cvmx_lmcx_rodt_ctl_s cn56xxp1;
+       struct cvmx_lmcx_rodt_ctl_s cn58xx;
+       struct cvmx_lmcx_rodt_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_rodt_mask
+ *
+ * System designers may desire to terminate DQ/DQS lines for higher frequency
+ * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have
+ * built-in termination resistors that can be turned on or off by the
+ * controller, after meeting TAOND and TAOF timing requirements.
+ *
+ * Each rank has its own ODT pin that fans out to all the memory parts in
+ * that DIMM. System designers may prefer different combinations of ODT ONs
+ * for read operations into different ranks. CNXXXX supports full
+ * programmability by way of the mask register below. Each rank position has
+ * its own 4-bit programmable field. When the controller does a read to that
+ * rank, it sets the 4 ODT pins to the MASK pins below. For example, when
+ * doing a read from Rank0, a system designer may desire to terminate the
+ * lines with the resistor on DIMM0/Rank1. The mask [RODT_D0_R0] would then
+ * be [0010].
+ *
+ * CNXXXX drives the appropriate mask values on the ODT pins by default.
+ * If this feature is not required, write 0x0 in this register. Note that,
+ * as per the JEDEC DDR3 specifications, the ODT pin for the rank that is
+ * being read should always be 0x0. When a given RANK is selected, the RODT
+ * mask for that rank is used. The resulting RODT mask is driven to the
+ * DIMMs in the following manner:
+ */
+union cvmx_lmcx_rodt_mask {
+       u64 u64;
+       struct cvmx_lmcx_rodt_mask_s {
+               uint64_t rodt_d3_r1:8;
+               uint64_t rodt_d3_r0:8;
+               uint64_t rodt_d2_r1:8;
+               uint64_t rodt_d2_r0:8;
+               uint64_t rodt_d1_r1:8;
+               uint64_t rodt_d1_r0:8;
+               uint64_t rodt_d0_r1:8;
+               uint64_t rodt_d0_r0:8;
+       } s;
+       struct cvmx_lmcx_rodt_mask_s cn61xx;
+       struct cvmx_lmcx_rodt_mask_s cn63xx;
+       struct cvmx_lmcx_rodt_mask_s cn63xxp1;
+       struct cvmx_lmcx_rodt_mask_s cn66xx;
+       struct cvmx_lmcx_rodt_mask_s cn68xx;
+       struct cvmx_lmcx_rodt_mask_s cn68xxp1;
+       struct cvmx_lmcx_rodt_mask_cn70xx {
+               uint64_t reserved_28_63:36;
+               uint64_t rodt_d1_r1:4;
+               uint64_t reserved_20_23:4;
+               uint64_t rodt_d1_r0:4;
+               uint64_t reserved_12_15:4;
+               uint64_t rodt_d0_r1:4;
+               uint64_t reserved_4_7:4;
+               uint64_t rodt_d0_r0:4;
+       } cn70xx;
+       struct cvmx_lmcx_rodt_mask_cn70xx cn70xxp1;
+       struct cvmx_lmcx_rodt_mask_cn70xx cn73xx;
+       struct cvmx_lmcx_rodt_mask_cn70xx cn78xx;
+       struct cvmx_lmcx_rodt_mask_cn70xx cn78xxp1;
+       struct cvmx_lmcx_rodt_mask_s cnf71xx;
+       struct cvmx_lmcx_rodt_mask_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg0
+ *
+ * LMC_SCRAMBLE_CFG0 = LMC Scramble Config0
+ *
+ */
+union cvmx_lmcx_scramble_cfg0 {
+       u64 u64;
+       struct cvmx_lmcx_scramble_cfg0_s {
+               uint64_t key:64;
+       } s;
+       struct cvmx_lmcx_scramble_cfg0_s cn61xx;
+       struct cvmx_lmcx_scramble_cfg0_s cn66xx;
+       struct cvmx_lmcx_scramble_cfg0_s cn70xx;
+       struct cvmx_lmcx_scramble_cfg0_s cn70xxp1;
+       struct cvmx_lmcx_scramble_cfg0_s cn73xx;
+       struct cvmx_lmcx_scramble_cfg0_s cn78xx;
+       struct cvmx_lmcx_scramble_cfg0_s cn78xxp1;
+       struct cvmx_lmcx_scramble_cfg0_s cnf71xx;
+       struct cvmx_lmcx_scramble_cfg0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg1
+ *
+ * These registers set the aliasing that uses the lowest, legal chip select(s).
+ *
+ */
+union cvmx_lmcx_scramble_cfg1 {
+       u64 u64;
+       struct cvmx_lmcx_scramble_cfg1_s {
+               uint64_t key:64;
+       } s;
+       struct cvmx_lmcx_scramble_cfg1_s cn61xx;
+       struct cvmx_lmcx_scramble_cfg1_s cn66xx;
+       struct cvmx_lmcx_scramble_cfg1_s cn70xx;
+       struct cvmx_lmcx_scramble_cfg1_s cn70xxp1;
+       struct cvmx_lmcx_scramble_cfg1_s cn73xx;
+       struct cvmx_lmcx_scramble_cfg1_s cn78xx;
+       struct cvmx_lmcx_scramble_cfg1_s cn78xxp1;
+       struct cvmx_lmcx_scramble_cfg1_s cnf71xx;
+       struct cvmx_lmcx_scramble_cfg1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg2
+ */
+union cvmx_lmcx_scramble_cfg2 {
+       u64 u64;
+       struct cvmx_lmcx_scramble_cfg2_s {
+               uint64_t key:64;
+       } s;
+       struct cvmx_lmcx_scramble_cfg2_s cn73xx;
+       struct cvmx_lmcx_scramble_cfg2_s cn78xx;
+       struct cvmx_lmcx_scramble_cfg2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scrambled_fadr
+ *
+ * LMC()_FADR captures the failing pre-scrambled address location (split into
+ * DIMM, bunk, bank, etc). If scrambling is off, LMC()_FADR also captures the
+ * failing physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures
+ * the actual failing address location in the physical DRAM parts, i.e.:
+ *
+ * * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical
+ * location in the
+ * DRAM parts (split into DIMM, bunk, bank, etc).
+ *
+ * * If scrambling is off, the pre-scramble and post-scramble addresses are
+ * the same, and so the
+ * contents of LMC()_SCRAMBLED_FADR match the contents of LMC()_FADR.
+ *
+ * This register only captures the first transaction with ECC errors. A DED
+ * error can over-write this register with its failing addresses if the first
+ * error was a SEC. If you write LMC()_CONFIG -> SEC_ERR/DED_ERR, it clears
+ * the error bits and captures the next failing address. If [FDIMM] is 1,
+ * that means the error is in the higher DIMM.
+ */
+union cvmx_lmcx_scrambled_fadr {
+       u64 u64;
+       struct cvmx_lmcx_scrambled_fadr_s {
+               uint64_t reserved_43_63:21;
+               uint64_t fcid:3;
+               uint64_t fill_order:2;
+               uint64_t reserved_14_37:24;
+               uint64_t fcol:14;
+       } s;
+       struct cvmx_lmcx_scrambled_fadr_cn61xx {
+               uint64_t reserved_36_63:28;
+               uint64_t fdimm:2;
+               uint64_t fbunk:1;
+               uint64_t fbank:3;
+               uint64_t frow:16;
+               uint64_t fcol:14;
+       } cn61xx;
+       struct cvmx_lmcx_scrambled_fadr_cn61xx cn66xx;
+       struct cvmx_lmcx_scrambled_fadr_cn70xx {
+               uint64_t reserved_40_63:24;
+               uint64_t fill_order:2;
+               uint64_t fdimm:1;
+               uint64_t fbunk:1;
+               uint64_t fbank:4;
+               uint64_t frow:18;
+               uint64_t fcol:14;
+       } cn70xx;
+       struct cvmx_lmcx_scrambled_fadr_cn70xx cn70xxp1;
+       struct cvmx_lmcx_scrambled_fadr_cn73xx {
+               uint64_t reserved_43_63:21;
+               uint64_t fcid:3;
+               uint64_t fill_order:2;
+               uint64_t fdimm:1;
+               uint64_t fbunk:1;
+               uint64_t fbank:4;
+               uint64_t frow:18;
+               uint64_t fcol:14;
+       } cn73xx;
+       struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xx;
+       struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xxp1;
+       struct cvmx_lmcx_scrambled_fadr_cn61xx cnf71xx;
+       struct cvmx_lmcx_scrambled_fadr_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_seq_ctl
+ *
+ * This register is used to initiate the various control sequences in the LMC.
+ *
+ */
+union cvmx_lmcx_seq_ctl {
+       u64 u64;
+       struct cvmx_lmcx_seq_ctl_s {
+               uint64_t reserved_6_63:58;
+               uint64_t seq_complete:1;
+               uint64_t seq_sel:4;
+               uint64_t init_start:1;
+       } s;
+       struct cvmx_lmcx_seq_ctl_s cn70xx;
+       struct cvmx_lmcx_seq_ctl_s cn70xxp1;
+       struct cvmx_lmcx_seq_ctl_s cn73xx;
+       struct cvmx_lmcx_seq_ctl_s cn78xx;
+       struct cvmx_lmcx_seq_ctl_s cn78xxp1;
+       struct cvmx_lmcx_seq_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl0
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register to
+ * the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL, and LMC()_MODEREG_PARAMS0 registers
+ * change. Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this register depends on
+ * LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ * The hardware-calculated minimums for these fields are shown in
+ * LMC(0)_SLOT_CTL0 Hardware-Calculated Minimums.
+ */
+union cvmx_lmcx_slot_ctl0 {
+       u64 u64;
+       struct cvmx_lmcx_slot_ctl0_s {
+               uint64_t reserved_50_63:14;
+               uint64_t w2r_l_init_ext:1;
+               uint64_t w2r_init_ext:1;
+               uint64_t w2w_l_init:6;
+               uint64_t w2r_l_init:6;
+               uint64_t r2w_l_init:6;
+               uint64_t r2r_l_init:6;
+               uint64_t w2w_init:6;
+               uint64_t w2r_init:6;
+               uint64_t r2w_init:6;
+               uint64_t r2r_init:6;
+       } s;
+       struct cvmx_lmcx_slot_ctl0_cn61xx {
+               uint64_t reserved_24_63:40;
+               uint64_t w2w_init:6;
+               uint64_t w2r_init:6;
+               uint64_t r2w_init:6;
+               uint64_t r2r_init:6;
+       } cn61xx;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cn63xx;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cn63xxp1;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cn66xx;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cn68xx;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cn68xxp1;
+       struct cvmx_lmcx_slot_ctl0_cn70xx {
+               uint64_t reserved_48_63:16;
+               uint64_t w2w_l_init:6;
+               uint64_t w2r_l_init:6;
+               uint64_t r2w_l_init:6;
+               uint64_t r2r_l_init:6;
+               uint64_t w2w_init:6;
+               uint64_t w2r_init:6;
+               uint64_t r2w_init:6;
+               uint64_t r2r_init:6;
+       } cn70xx;
+       struct cvmx_lmcx_slot_ctl0_cn70xx cn70xxp1;
+       struct cvmx_lmcx_slot_ctl0_s cn73xx;
+       struct cvmx_lmcx_slot_ctl0_s cn78xx;
+       struct cvmx_lmcx_slot_ctl0_s cn78xxp1;
+       struct cvmx_lmcx_slot_ctl0_cn61xx cnf71xx;
+       struct cvmx_lmcx_slot_ctl0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl1
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register to
+ * the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on
+ * LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in
+ * LMC(0)_SLOT_CTL1 Hardware-Calculated Minimums.
+ */
+union cvmx_lmcx_slot_ctl1 {
+       u64 u64;
+       struct cvmx_lmcx_slot_ctl1_s {
+               uint64_t reserved_24_63:40;
+               uint64_t w2w_xrank_init:6;
+               uint64_t w2r_xrank_init:6;
+               uint64_t r2w_xrank_init:6;
+               uint64_t r2r_xrank_init:6;
+       } s;
+       struct cvmx_lmcx_slot_ctl1_s cn61xx;
+       struct cvmx_lmcx_slot_ctl1_s cn63xx;
+       struct cvmx_lmcx_slot_ctl1_s cn63xxp1;
+       struct cvmx_lmcx_slot_ctl1_s cn66xx;
+       struct cvmx_lmcx_slot_ctl1_s cn68xx;
+       struct cvmx_lmcx_slot_ctl1_s cn68xxp1;
+       struct cvmx_lmcx_slot_ctl1_s cn70xx;
+       struct cvmx_lmcx_slot_ctl1_s cn70xxp1;
+       struct cvmx_lmcx_slot_ctl1_s cn73xx;
+       struct cvmx_lmcx_slot_ctl1_s cn78xx;
+       struct cvmx_lmcx_slot_ctl1_s cn78xxp1;
+       struct cvmx_lmcx_slot_ctl1_s cnf71xx;
+       struct cvmx_lmcx_slot_ctl1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl2
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register
+ * to the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in LMC Registers.
+ */
+union cvmx_lmcx_slot_ctl2 {
+       u64 u64;
+       struct cvmx_lmcx_slot_ctl2_s {
+               uint64_t reserved_24_63:40;
+               uint64_t w2w_xdimm_init:6;
+               uint64_t w2r_xdimm_init:6;
+               uint64_t r2w_xdimm_init:6;
+               uint64_t r2r_xdimm_init:6;
+       } s;
+       struct cvmx_lmcx_slot_ctl2_s cn61xx;
+       struct cvmx_lmcx_slot_ctl2_s cn63xx;
+       struct cvmx_lmcx_slot_ctl2_s cn63xxp1;
+       struct cvmx_lmcx_slot_ctl2_s cn66xx;
+       struct cvmx_lmcx_slot_ctl2_s cn68xx;
+       struct cvmx_lmcx_slot_ctl2_s cn68xxp1;
+       struct cvmx_lmcx_slot_ctl2_s cn70xx;
+       struct cvmx_lmcx_slot_ctl2_s cn70xxp1;
+       struct cvmx_lmcx_slot_ctl2_s cn73xx;
+       struct cvmx_lmcx_slot_ctl2_s cn78xx;
+       struct cvmx_lmcx_slot_ctl2_s cn78xxp1;
+       struct cvmx_lmcx_slot_ctl2_s cnf71xx;
+       struct cvmx_lmcx_slot_ctl2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl3
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register
+ * to the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in LMC Registers.
+ */
+union cvmx_lmcx_slot_ctl3 {
+       u64 u64;
+       struct cvmx_lmcx_slot_ctl3_s {
+               uint64_t reserved_50_63:14;
+               uint64_t w2r_l_xrank_init_ext:1;
+               uint64_t w2r_xrank_init_ext:1;
+               uint64_t w2w_l_xrank_init:6;
+               uint64_t w2r_l_xrank_init:6;
+               uint64_t r2w_l_xrank_init:6;
+               uint64_t r2r_l_xrank_init:6;
+               uint64_t w2w_xrank_init:6;
+               uint64_t w2r_xrank_init:6;
+               uint64_t r2w_xrank_init:6;
+               uint64_t r2r_xrank_init:6;
+       } s;
+       struct cvmx_lmcx_slot_ctl3_s cn73xx;
+       struct cvmx_lmcx_slot_ctl3_s cn78xx;
+       struct cvmx_lmcx_slot_ctl3_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params0
+ */
+union cvmx_lmcx_timing_params0 {
+       u64 u64;
+       struct cvmx_lmcx_timing_params0_s {
+               uint64_t reserved_54_63:10;
+               uint64_t tbcw:6;
+               uint64_t reserved_26_47:22;
+               uint64_t tmrd:4;
+               uint64_t reserved_8_21:14;
+               uint64_t tckeon:8;
+       } s;
+       struct cvmx_lmcx_timing_params0_cn61xx {
+               uint64_t reserved_47_63:17;
+               uint64_t trp_ext:1;
+               uint64_t tcksre:4;
+               uint64_t trp:4;
+               uint64_t tzqinit:4;
+               uint64_t tdllk:4;
+               uint64_t tmod:4;
+               uint64_t tmrd:4;
+               uint64_t txpr:4;
+               uint64_t tcke:4;
+               uint64_t tzqcs:4;
+               uint64_t reserved_0_9:10;
+       } cn61xx;
+       struct cvmx_lmcx_timing_params0_cn61xx cn63xx;
+       struct cvmx_lmcx_timing_params0_cn63xxp1 {
+               uint64_t reserved_46_63:18;
+               uint64_t tcksre:4;
+               uint64_t trp:4;
+               uint64_t tzqinit:4;
+               uint64_t tdllk:4;
+               uint64_t tmod:4;
+               uint64_t tmrd:4;
+               uint64_t txpr:4;
+               uint64_t tcke:4;
+               uint64_t tzqcs:4;
+               uint64_t tckeon:10;
+       } cn63xxp1;
+       struct cvmx_lmcx_timing_params0_cn61xx cn66xx;
+       struct cvmx_lmcx_timing_params0_cn61xx cn68xx;
+       struct cvmx_lmcx_timing_params0_cn61xx cn68xxp1;
+       struct cvmx_lmcx_timing_params0_cn70xx {
+               uint64_t reserved_48_63:16;
+               uint64_t tcksre:4;
+               uint64_t trp:5;
+               uint64_t tzqinit:4;
+               uint64_t tdllk:4;
+               uint64_t tmod:5;
+               uint64_t tmrd:4;
+               uint64_t txpr:6;
+               uint64_t tcke:4;
+               uint64_t tzqcs:4;
+               uint64_t reserved_0_7:8;
+       } cn70xx;
+       struct cvmx_lmcx_timing_params0_cn70xx cn70xxp1;
+       struct cvmx_lmcx_timing_params0_cn73xx {
+               uint64_t reserved_54_63:10;
+               uint64_t tbcw:6;
+               uint64_t tcksre:4;
+               uint64_t trp:5;
+               uint64_t tzqinit:4;
+               uint64_t tdllk:4;
+               uint64_t tmod:5;
+               uint64_t tmrd:4;
+               uint64_t txpr:6;
+               uint64_t tcke:4;
+               uint64_t tzqcs:4;
+               uint64_t reserved_0_7:8;
+       } cn73xx;
+       struct cvmx_lmcx_timing_params0_cn73xx cn78xx;
+       struct cvmx_lmcx_timing_params0_cn73xx cn78xxp1;
+       struct cvmx_lmcx_timing_params0_cn61xx cnf71xx;
+       struct cvmx_lmcx_timing_params0_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params1
+ */
+union cvmx_lmcx_timing_params1 {
+       u64 u64;
+       struct cvmx_lmcx_timing_params1_s {
+               uint64_t reserved_59_63:5;
+               uint64_t txp_ext:1;
+               uint64_t trcd_ext:1;
+               uint64_t tpdm_full_cycle_ena:1;
+               uint64_t trfc_dlr:7;
+               uint64_t reserved_4_48:45;
+               uint64_t tmprr:4;
+       } s;
+       struct cvmx_lmcx_timing_params1_cn61xx {
+               uint64_t reserved_47_63:17;
+               uint64_t tras_ext:1;
+               uint64_t txpdll:5;
+               uint64_t tfaw:5;
+               uint64_t twldqsen:4;
+               uint64_t twlmrd:4;
+               uint64_t txp:3;
+               uint64_t trrd:3;
+               uint64_t trfc:5;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:5;
+               uint64_t tmprr:4;
+       } cn61xx;
+       struct cvmx_lmcx_timing_params1_cn61xx cn63xx;
+       struct cvmx_lmcx_timing_params1_cn63xxp1 {
+               uint64_t reserved_46_63:18;
+               uint64_t txpdll:5;
+               uint64_t tfaw:5;
+               uint64_t twldqsen:4;
+               uint64_t twlmrd:4;
+               uint64_t txp:3;
+               uint64_t trrd:3;
+               uint64_t trfc:5;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:5;
+               uint64_t tmprr:4;
+       } cn63xxp1;
+       struct cvmx_lmcx_timing_params1_cn61xx cn66xx;
+       struct cvmx_lmcx_timing_params1_cn61xx cn68xx;
+       struct cvmx_lmcx_timing_params1_cn61xx cn68xxp1;
+       struct cvmx_lmcx_timing_params1_cn70xx {
+               uint64_t reserved_49_63:15;
+               uint64_t txpdll:5;
+               uint64_t tfaw:5;
+               uint64_t twldqsen:4;
+               uint64_t twlmrd:4;
+               uint64_t txp:3;
+               uint64_t trrd:3;
+               uint64_t trfc:7;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:6;
+               uint64_t tmprr:4;
+       } cn70xx;
+       struct cvmx_lmcx_timing_params1_cn70xx cn70xxp1;
+       struct cvmx_lmcx_timing_params1_cn73xx {
+               uint64_t reserved_59_63:5;
+               uint64_t txp_ext:1;
+               uint64_t trcd_ext:1;
+               uint64_t tpdm_full_cycle_ena:1;
+               uint64_t trfc_dlr:7;
+               uint64_t txpdll:5;
+               uint64_t tfaw:5;
+               uint64_t twldqsen:4;
+               uint64_t twlmrd:4;
+               uint64_t txp:3;
+               uint64_t trrd:3;
+               uint64_t trfc:7;
+               uint64_t twtr:4;
+               uint64_t trcd:4;
+               uint64_t tras:6;
+               uint64_t tmprr:4;
+       } cn73xx;
+       struct cvmx_lmcx_timing_params1_cn73xx cn78xx;
+       struct cvmx_lmcx_timing_params1_cn73xx cn78xxp1;
+       struct cvmx_lmcx_timing_params1_cn61xx cnf71xx;
+       struct cvmx_lmcx_timing_params1_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params2
+ *
+ * This register sets timing parameters for DDR4.
+ *
+ */
+union cvmx_lmcx_timing_params2 {
+       u64 u64;
+       struct cvmx_lmcx_timing_params2_s {
+               uint64_t reserved_16_63:48;
+               uint64_t trrd_l_ext:1;
+               uint64_t trtp:4;
+               uint64_t t_rw_op_max:4;
+               uint64_t twtr_l:4;
+               uint64_t trrd_l:3;
+       } s;
+       struct cvmx_lmcx_timing_params2_cn70xx {
+               uint64_t reserved_15_63:49;
+               uint64_t trtp:4;
+               uint64_t t_rw_op_max:4;
+               uint64_t twtr_l:4;
+               uint64_t trrd_l:3;
+       } cn70xx;
+       struct cvmx_lmcx_timing_params2_cn70xx cn70xxp1;
+       struct cvmx_lmcx_timing_params2_s cn73xx;
+       struct cvmx_lmcx_timing_params2_s cn78xx;
+       struct cvmx_lmcx_timing_params2_s cn78xxp1;
+       struct cvmx_lmcx_timing_params2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_tro_ctl
+ *
+ * LMC_TRO_CTL = LMC Temperature Ring Osc Control
+ * This register is an assortment of various control fields needed to
+ * control the temperature ring oscillator
+ *
+ * Notes:
+ * To bring up the temperature ring oscillator, write TRESET to 0, and
+ * follow by initializing RCLK_CNT to desired value
+ */
+union cvmx_lmcx_tro_ctl {
+       u64 u64;
+       struct cvmx_lmcx_tro_ctl_s {
+               uint64_t reserved_33_63:31;
+               uint64_t rclk_cnt:32;
+               uint64_t treset:1;
+       } s;
+       struct cvmx_lmcx_tro_ctl_s cn61xx;
+       struct cvmx_lmcx_tro_ctl_s cn63xx;
+       struct cvmx_lmcx_tro_ctl_s cn63xxp1;
+       struct cvmx_lmcx_tro_ctl_s cn66xx;
+       struct cvmx_lmcx_tro_ctl_s cn68xx;
+       struct cvmx_lmcx_tro_ctl_s cn68xxp1;
+       struct cvmx_lmcx_tro_ctl_s cnf71xx;
+};
+
+/**
+ * cvmx_lmc#_tro_stat
+ *
+ * LMC_TRO_STAT = LMC Temperature Ring Osc Status
+ * This register is an assortment of various control fields needed to
+ * control the temperature ring oscillator
+ */
+union cvmx_lmcx_tro_stat {
+       u64 u64;
+       struct cvmx_lmcx_tro_stat_s {
+               uint64_t reserved_32_63:32;
+               uint64_t ring_cnt:32;
+       } s;
+       struct cvmx_lmcx_tro_stat_s cn61xx;
+       struct cvmx_lmcx_tro_stat_s cn63xx;
+       struct cvmx_lmcx_tro_stat_s cn63xxp1;
+       struct cvmx_lmcx_tro_stat_s cn66xx;
+       struct cvmx_lmcx_tro_stat_s cn68xx;
+       struct cvmx_lmcx_tro_stat_s cn68xxp1;
+       struct cvmx_lmcx_tro_stat_s cnf71xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_ctl
+ */
+union cvmx_lmcx_wlevel_ctl {
+       u64 u64;
+       struct cvmx_lmcx_wlevel_ctl_s {
+               uint64_t reserved_22_63:42;
+               uint64_t rtt_nom:3;
+               uint64_t bitmask:8;
+               uint64_t or_dis:1;
+               uint64_t sset:1;
+               uint64_t lanemask:9;
+       } s;
+       struct cvmx_lmcx_wlevel_ctl_s cn61xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn63xx;
+       struct cvmx_lmcx_wlevel_ctl_cn63xxp1 {
+               uint64_t reserved_10_63:54;
+               uint64_t sset:1;
+               uint64_t lanemask:9;
+       } cn63xxp1;
+       struct cvmx_lmcx_wlevel_ctl_s cn66xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn68xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn68xxp1;
+       struct cvmx_lmcx_wlevel_ctl_s cn70xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn70xxp1;
+       struct cvmx_lmcx_wlevel_ctl_s cn73xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn78xx;
+       struct cvmx_lmcx_wlevel_ctl_s cn78xxp1;
+       struct cvmx_lmcx_wlevel_ctl_s cnf71xx;
+       struct cvmx_lmcx_wlevel_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_dbg
+ *
+ * A given write of LMC()_WLEVEL_DBG returns the write leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only one
+ * byte in the last rank that the hardware write leveled.
+ * LMC()_WLEVEL_DBG[BYTE] selects the particular byte. To get these
+ * pass/fail results for a different rank, you must run the hardware write
+ * leveling again. For example, it is possible to get the [BITMASK] results
+ * for every byte of every rank if you run write leveling separately for
+ * each rank, probing LMC()_WLEVEL_DBG between each write-leveling.
+ */
+union cvmx_lmcx_wlevel_dbg {
+       u64 u64;
+       struct cvmx_lmcx_wlevel_dbg_s {
+               uint64_t reserved_12_63:52;
+               uint64_t bitmask:8;
+               uint64_t byte:4;
+       } s;
+       struct cvmx_lmcx_wlevel_dbg_s cn61xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn63xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn63xxp1;
+       struct cvmx_lmcx_wlevel_dbg_s cn66xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn68xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn68xxp1;
+       struct cvmx_lmcx_wlevel_dbg_s cn70xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn70xxp1;
+       struct cvmx_lmcx_wlevel_dbg_s cn73xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn78xx;
+       struct cvmx_lmcx_wlevel_dbg_s cn78xxp1;
+       struct cvmx_lmcx_wlevel_dbg_s cnf71xx;
+       struct cvmx_lmcx_wlevel_dbg_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_rank#
+ *
+ * Four of these CSRs exist per LMC, one for each rank. Write level setting
+ * is measured in units of 1/8 CK, so the below BYTEn values can range over
+ * 4 CK cycles. Assuming LMC()_WLEVEL_CTL[SSET]=0, the BYTEn<2:0> values are
+ * not used during write leveling, and they are overwritten by the hardware
+ * as part of the write leveling sequence. (Hardware sets [STATUS] to 3 after
+ * hardware write leveling completes for the rank). Software needs to set
+ * BYTEn<4:3> bits.
+ *
+ * Each CSR may also be written by software, but not while a write leveling
+ * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.)
+ * Software initiates a hardware write-leveling sequence by programming
+ * LMC()_WLEVEL_CTL and writing RANKMASK and INIT_START=1 with SEQ_SEL=6 in
+ * LMC*0_CONFIG.
+ *
+ * LMC will then step through and accumulate write leveling results for 8
+ * unique delay settings (twice), starting at a delay of LMC()_WLEVEL_RANK()
+ * [BYTEn<4:3>]* 8 CK increasing by 1/8 CK each setting. Hardware will then
+ * set LMC()_WLEVEL_RANK()[BYTEn<2:0>] to indicate the first write leveling
+ * result of 1 that followed a result of 0 during the sequence by searching
+ * for a '1100' pattern in the generated bitmask, except that LMC will always
+ * write LMC()_WLEVEL_RANK()[BYTEn<0>]=0. If hardware is unable to find a match
+ * for a '1100' pattern, then hardware sets LMC()_WLEVEL_RANK() [BYTEn<2:0>]
+ * to 0x4. See LMC()_WLEVEL_CTL.
+ *
+ * LMC()_WLEVEL_RANKi values for ranks i without attached DRAM should be set
+ * such that they do not increase the range of possible BYTE values for any
+ * byte lane. The easiest way to do this is to set LMC()_WLEVEL_RANKi =
+ * LMC()_WLEVEL_RANKj, where j is some rank with attached DRAM whose
+ * LMC()_WLEVEL_RANKj is already fully initialized.
+ */
+union cvmx_lmcx_wlevel_rankx {
+       u64 u64;
+       struct cvmx_lmcx_wlevel_rankx_s {
+               uint64_t reserved_47_63:17;
+               uint64_t status:2;
+               uint64_t byte8:5;
+               uint64_t byte7:5;
+               uint64_t byte6:5;
+               uint64_t byte5:5;
+               uint64_t byte4:5;
+               uint64_t byte3:5;
+               uint64_t byte2:5;
+               uint64_t byte1:5;
+               uint64_t byte0:5;
+       } s;
+       struct cvmx_lmcx_wlevel_rankx_s cn61xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn63xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn63xxp1;
+       struct cvmx_lmcx_wlevel_rankx_s cn66xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn68xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn68xxp1;
+       struct cvmx_lmcx_wlevel_rankx_s cn70xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn70xxp1;
+       struct cvmx_lmcx_wlevel_rankx_s cn73xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn78xx;
+       struct cvmx_lmcx_wlevel_rankx_s cn78xxp1;
+       struct cvmx_lmcx_wlevel_rankx_s cnf71xx;
+       struct cvmx_lmcx_wlevel_rankx_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wodt_ctl0
+ *
+ * LMC_WODT_CTL0 = LMC Write OnDieTermination control
+ * See the description in LMC_WODT_CTL1.
+ *
+ * Notes:
+ * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write
+ * ODT mask.  See LMC_WODT_CTL1.
+ *
+ */
+union cvmx_lmcx_wodt_ctl0 {
+       u64 u64;
+       struct cvmx_lmcx_wodt_ctl0_s {
+               uint64_t reserved_0_63:64;
+       } s;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx {
+               uint64_t reserved_32_63:32;
+               uint64_t wodt_d1_r1:8;
+               uint64_t wodt_d1_r0:8;
+               uint64_t wodt_d0_r1:8;
+               uint64_t wodt_d0_r0:8;
+       } cn30xx;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx cn31xx;
+       struct cvmx_lmcx_wodt_ctl0_cn38xx {
+               uint64_t reserved_32_63:32;
+               uint64_t wodt_hi3:4;
+               uint64_t wodt_hi2:4;
+               uint64_t wodt_hi1:4;
+               uint64_t wodt_hi0:4;
+               uint64_t wodt_lo3:4;
+               uint64_t wodt_lo2:4;
+               uint64_t wodt_lo1:4;
+               uint64_t wodt_lo0:4;
+       } cn38xx;
+       struct cvmx_lmcx_wodt_ctl0_cn38xx cn38xxp2;
+       struct cvmx_lmcx_wodt_ctl0_cn38xx cn50xx;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xx;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xxp1;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xx;
+       struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xxp1;
+       struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xx;
+       struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_wodt_ctl1
+ *
+ * LMC_WODT_CTL1 = LMC Write OnDieTermination control
+ * System designers may desire to terminate DQ/DQS/DM lines for higher
+ * frequency DDR operations (667MHz and faster), especially on a multi-rank
+ * system. DDR2 DQ/DM/DQS I/O's have built in Termination resistor that can
+ * be turned on or off by the controller, after meeting tAOND and tAOF
+ * timing requirements. Each Rank has its own ODT pin that fans out to all
+ * the memory parts in that DIMM. System designers may prefer different
+ * combinations of ODT ON's for read and write into different ranks. Octeon
+ * supports full programmability by way of the mask register below.
+ * Each Rank position has its own 8-bit programmable field.
+ * When the controller does a write to that rank, it sets the 8 ODT pins
+ * to the MASK pins below. For eg., When doing a write into Rank0, a system
+ * designer may desire to terminate the lines with the resistor on
+ * Dimm0/Rank1. The mask WODT_D0_R0 would then be [00000010]. If ODT feature
+ * is not desired, the DDR parts can be programmed to not look at these pins by
+ * writing 0 in QS_DIC. Octeon drives the appropriate mask values on the ODT
+ * pins by default.
+ * If this feature is not required, write 0 in this register.
+ *
+ * Notes:
+ * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write
+ * ODT mask. When a given RANK is selected, the WODT mask for that RANK
+ * is used.  The resulting WODT mask is driven to the DIMMs in the following
+ * manner:
+ *            BUNK_ENA=1     BUNK_ENA=0
+ * Mask[7] -> DIMM3, RANK1    DIMM3
+ * Mask[6] -> DIMM3, RANK0
+ * Mask[5] -> DIMM2, RANK1    DIMM2
+ * Mask[4] -> DIMM2, RANK0
+ * Mask[3] -> DIMM1, RANK1    DIMM1
+ * Mask[2] -> DIMM1, RANK0
+ * Mask[1] -> DIMM0, RANK1    DIMM0
+ * Mask[0] -> DIMM0, RANK0
+ */
+union cvmx_lmcx_wodt_ctl1 {
+       u64 u64;
+       struct cvmx_lmcx_wodt_ctl1_s {
+               uint64_t reserved_32_63:32;
+               uint64_t wodt_d3_r1:8;
+               uint64_t wodt_d3_r0:8;
+               uint64_t wodt_d2_r1:8;
+               uint64_t wodt_d2_r0:8;
+       } s;
+       struct cvmx_lmcx_wodt_ctl1_s cn30xx;
+       struct cvmx_lmcx_wodt_ctl1_s cn31xx;
+       struct cvmx_lmcx_wodt_ctl1_s cn52xx;
+       struct cvmx_lmcx_wodt_ctl1_s cn52xxp1;
+       struct cvmx_lmcx_wodt_ctl1_s cn56xx;
+       struct cvmx_lmcx_wodt_ctl1_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_wodt_mask
+ *
+ * System designers may desire to terminate DQ/DQS lines for higher-frequency
+ * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have
+ * built-in termination resistors that can be turned on or off by the
+ * controller, after meeting TAOND and TAOF timing requirements. Each rank
+ * has its own ODT pin that fans out to all of the memory parts in that DIMM.
+ * System designers may prefer different combinations of ODT ONs for write
+ * operations into different ranks. CNXXXX supports full programmability by
+ * way of the mask register below. Each rank position has its own 8-bit
+ * programmable field. When the controller does a write to that rank,
+ * it sets the four ODT pins to the mask pins below. For example, when
+ * doing a write into Rank0, a system designer may desire to terminate the
+ * lines with the resistor on DIMM0/Rank1. The mask [WODT_D0_R0] would then
+ * be [00000010].
+ *
+ * CNXXXX drives the appropriate mask values on the ODT pins by default.
+ * If this feature is not required, write 0x0 in this register. When a
+ * given RANK is selected, the WODT mask for that RANK is used. The
+ * resulting WODT mask is driven to the DIMMs in the following manner:
+ */
+union cvmx_lmcx_wodt_mask {
+       u64 u64;
+       struct cvmx_lmcx_wodt_mask_s {
+               uint64_t wodt_d3_r1:8;
+               uint64_t wodt_d3_r0:8;
+               uint64_t wodt_d2_r1:8;
+               uint64_t wodt_d2_r0:8;
+               uint64_t wodt_d1_r1:8;
+               uint64_t wodt_d1_r0:8;
+               uint64_t wodt_d0_r1:8;
+               uint64_t wodt_d0_r0:8;
+       } s;
+       struct cvmx_lmcx_wodt_mask_s cn61xx;
+       struct cvmx_lmcx_wodt_mask_s cn63xx;
+       struct cvmx_lmcx_wodt_mask_s cn63xxp1;
+       struct cvmx_lmcx_wodt_mask_s cn66xx;
+       struct cvmx_lmcx_wodt_mask_s cn68xx;
+       struct cvmx_lmcx_wodt_mask_s cn68xxp1;
+       struct cvmx_lmcx_wodt_mask_cn70xx {
+               uint64_t reserved_28_63:36;
+               uint64_t wodt_d1_r1:4;
+               uint64_t reserved_20_23:4;
+               uint64_t wodt_d1_r0:4;
+               uint64_t reserved_12_15:4;
+               uint64_t wodt_d0_r1:4;
+               uint64_t reserved_4_7:4;
+               uint64_t wodt_d0_r0:4;
+       } cn70xx;
+       struct cvmx_lmcx_wodt_mask_cn70xx cn70xxp1;
+       struct cvmx_lmcx_wodt_mask_cn70xx cn73xx;
+       struct cvmx_lmcx_wodt_mask_cn70xx cn78xx;
+       struct cvmx_lmcx_wodt_mask_cn70xx cn78xxp1;
+       struct cvmx_lmcx_wodt_mask_s cnf71xx;
+       struct cvmx_lmcx_wodt_mask_cn70xx cnf75xx;
+};
+
+#endif
diff --git a/arch/mips/mach-octeon/include/mach/octeon-feature.h b/arch/mips/mach-octeon/include/mach/octeon-feature.h
new file mode 100644 (file)
index 0000000..1202716
--- /dev/null
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_FEATURE_H__
+#define __OCTEON_FEATURE_H__
+
+/*
+ * Octeon models are declared after the macros in octeon-model.h with the
+ * suffix _FEATURE. The individual features are declared with the
+ * _FEATURE_ infix.
+ */
+enum octeon_feature {
+       /*
+        * Checks on the critical path are moved to the top (8 positions)
+        * so that the compiler generates one less insn than for the rest
+        * of the checks.
+        */
+       OCTEON_FEATURE_PKND, /* CN68XX uses port kinds for packet interface */
+       /* CN68XX has different fields in word0 - word2 */
+       OCTEON_FEATURE_CN68XX_WQE,
+
+       /*
+        * Features
+        */
+       /*
+        * Octeon models in the CN5XXX family and higher support atomic
+        * add instructions to memory (saa/saad)
+        */
+       OCTEON_FEATURE_SAAD,
+       /* Does this Octeon support the ZIP offload engine? */
+       OCTEON_FEATURE_ZIP,
+       /* Does this Octeon support crypto acceleration using COP2? */
+       OCTEON_FEATURE_CRYPTO,
+       /* Can crypto be enabled by calling cvmx_crypto_dormant_enable()? */
+       OCTEON_FEATURE_DORM_CRYPTO,
+       OCTEON_FEATURE_PCIE,    /* Does this Octeon support PCI express? */
+       OCTEON_FEATURE_SRIO,    /* Does this Octeon support SRIO */
+       OCTEON_FEATURE_ILK,     /* Does this Octeon support Interlaken */
+       /*
+        * Some Octeon models support internal memory for storing
+        * cryptographic keys
+        */
+       OCTEON_FEATURE_KEY_MEMORY,
+       /* Octeon has a LED controller for banks of external LEDs */
+       OCTEON_FEATURE_LED_CONTROLLER,
+       OCTEON_FEATURE_TRA,     /* Octeon has a trace buffer */
+       OCTEON_FEATURE_MGMT_PORT, /* Octeon has a management port */
+       OCTEON_FEATURE_RAID,    /* Octeon has a raid unit */
+       OCTEON_FEATURE_USB,     /* Octeon has a builtin USB */
+       /* Octeon IPD can run without using work queue entries */
+       OCTEON_FEATURE_NO_WPTR,
+       OCTEON_FEATURE_DFA,     /* Octeon has DFA state machines */
+       /*
+        * Octeon MDIO block supports clause 45 transactions for
+        * 10 Gig support
+        */
+       OCTEON_FEATURE_MDIO_CLAUSE_45,
+       /*
+        * CN52XX and CN56XX used a block named NPEI for PCIe access.
+        * Newer chips replaced this with SLI+DPI
+        */
+       OCTEON_FEATURE_NPEI,
+       OCTEON_FEATURE_HFA,     /* Octeon has DFA/HFA */
+       OCTEON_FEATURE_DFM,     /* Octeon has DFM */
+       OCTEON_FEATURE_CIU2,    /* Octeon has CIU2 */
+       /* Octeon has DMA Instruction Completion Interrupt mode */
+       OCTEON_FEATURE_DICI_MODE,
+       /* Octeon has Bit Select Extractor schedulor */
+       OCTEON_FEATURE_BIT_EXTRACTOR,
+       OCTEON_FEATURE_NAND,    /* Octeon has NAND */
+       OCTEON_FEATURE_MMC,     /* Octeon has built-in MMC support */
+       OCTEON_FEATURE_ROM,     /* Octeon has built-in ROM support */
+       OCTEON_FEATURE_AUTHENTIK, /* Octeon has Authentik ROM support */
+       OCTEON_FEATURE_MULTICAST_TIMER, /* Octeon has multi_cast timer */
+       OCTEON_FEATURE_MULTINODE, /* Octeon has node support */
+       OCTEON_FEATURE_CIU3,    /* Octeon has CIU3 */
+       OCTEON_FEATURE_FPA3,    /* Octeon has FPA first seen on 78XX */
+       /* CN78XX has different fields in word0 - word2 */
+       OCTEON_FEATURE_CN78XX_WQE,
+       OCTEON_FEATURE_PKO3,    /* Octeon has enhanced PKO block */
+       OCTEON_FEATURE_SPI,     /* Octeon supports SPI interfaces */
+       OCTEON_FEATURE_ZIP3,    /* Octeon has zip first seen on 78XX */
+       OCTEON_FEATURE_BCH,     /* Octeon supports BCH ECC */
+       OCTEON_FEATURE_PKI,     /* Octeon has PKI block */
+       OCTEON_FEATURE_OCLA,    /* Octeon has OCLA */
+       OCTEON_FEATURE_FAU,     /* Octeon has FAU */
+       OCTEON_FEATURE_BGX,     /* Octeon has BGX */
+       OCTEON_FEATURE_BGX_MIX, /* On of the BGX is used for MIX */
+       OCTEON_FEATURE_HNA,     /* Octeon has HNA */
+       OCTEON_FEATURE_BGX_XCV, /* Octeon has BGX XCV RGMII support */
+       OCTEON_FEATURE_TSO,     /* Octeon has tcp segmentation offload */
+       OCTEON_FEATURE_TDM,     /* Octeon has PCM/TDM support */
+       OCTEON_FEATURE_PTP,     /* Octeon has PTP support */
+       OCTEON_MAX_FEATURE
+};
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SAAD(void)
+{
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ZIP(void)
+{
+       if (OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+           OCTEON_IS_MODEL(OCTEON_CN70XX) || OCTEON_IS_MODEL(OCTEON_CNF75XX))
+               return 0;
+       else
+               return !cvmx_fuse_read(121);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ZIP3(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BCH(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN70XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CRYPTO(void)
+{
+       /* OCTEON II and later */
+       u64 val;
+
+       val = csr_rd(CVMX_MIO_FUS_DAT2);
+       if (val & MIO_FUS_DAT2_NOCRYPTO || val & MIO_FUS_DAT2_NOMUL)
+               return 0;
+       else if (!(val & MIO_FUS_DAT2_DORM_CRYPTO))
+               return 1;
+
+       val = csr_rd(CVMX_RNM_CTL_STATUS);
+       return val & RNM_CTL_STATUS_EER_VAL;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DORM_CRYPTO(void)
+{
+       /* OCTEON II and later */
+       u64 val;
+
+       val = csr_rd(CVMX_MIO_FUS_DAT2);
+       return !(val & MIO_FUS_DAT2_NOCRYPTO) && !(val & MIO_FUS_DAT2_NOMUL) &&
+               (val & MIO_FUS_DAT2_DORM_CRYPTO);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PCIE(void)
+{
+       /* OCTEON II and later have PCIe */
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SRIO(void)
+{
+       if (OCTEON_IS_MODEL(OCTEON_CNF75XX)) {
+               if (cvmx_fuse_read(1601) == 0)
+                       return 0;
+               else
+                       return 1;
+       } else {
+               return (OCTEON_IS_MODEL(OCTEON_CN63XX) ||
+                       OCTEON_IS_MODEL(OCTEON_CN66XX));
+       }
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ILK(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN78XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_KEY_MEMORY(void)
+{
+       /* OCTEON II or later */
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_LED_CONTROLLER(void)
+{
+       return false;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TRA(void)
+{
+       return !OCTEON_IS_OCTEON3();
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MGMT_PORT(void)
+{
+       /* OCTEON II or later */
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_RAID(void)
+{
+       return !OCTEON_IS_MODEL(OCTEON_CNF75XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_USB(void)
+{
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NO_WPTR(void)
+{
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DFA(void)
+{
+       return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_HFA(void)
+{
+       if (OCTEON_IS_MODEL(OCTEON_CNF75XX))
+               return 0;
+       else
+               return !cvmx_fuse_read(90);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_HNA(void)
+{
+       if (OCTEON_IS_MODEL(OCTEON_CN78XX) || OCTEON_IS_MODEL(OCTEON_CN73XX))
+               return !cvmx_fuse_read(134);
+       else
+               return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DFM(void)
+{
+       if (!(OCTEON_IS_MODEL(OCTEON_CN63XX) || OCTEON_IS_MODEL(OCTEON_CN66XX)))
+               return 0;
+       else
+               return !cvmx_fuse_read(90);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MDIO_CLAUSE_45(void)
+{
+       return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NPEI(void)
+{
+       return false;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKND(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN78XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CN68XX_WQE(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN68XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CIU2(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN68XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CIU3(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_FPA3(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NAND(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN63XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DICI_MODE(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) ||
+               OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BIT_EXTRACTOR(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) ||
+               OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MMC(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3());
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ROM(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_AUTHENTIK(void)
+{
+       if (OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+           OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+           OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+           OCTEON_IS_MODEL(OCTEON_CN70XX)) {
+               u64 val;
+
+               val = csr_rd(CVMX_MIO_FUS_DAT2);
+               return (val & MIO_FUS_DAT2_NOCRYPTO) &&
+                       (val & MIO_FUS_DAT2_DORM_CRYPTO);
+       }
+
+       return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MULTICAST_TIMER(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN66XX_PASS1_2) ||
+               OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MULTINODE(void)
+{
+       return (!OCTEON_IS_MODEL(OCTEON_CN76XX) &&
+               OCTEON_IS_MODEL(OCTEON_CN78XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CN78XX_WQE(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SPI(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3());
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKI(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKO3(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_OCLA(void)
+{
+       return OCTEON_IS_OCTEON3();
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_FAU(void)
+{
+       return (!OCTEON_IS_MODEL(OCTEON_CN78XX) &&
+               !OCTEON_IS_MODEL(OCTEON_CNF75XX) &&
+               !OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX_MIX(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX_XCV(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN73XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TSO(void)
+{
+       return (OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TDM(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN70XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PTP(void)
+{
+       return OCTEON_IS_MODEL(OCTEON_CN6XXX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF7XXX) ||
+               OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+               OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+               OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X);
+}
+
+/*
+ * Answer ``Is the bit for feature set in the bitmap?''
+ * @param feature
+ * @return 1 when the feature is present and 0 otherwise, -1 in case of error.
+ */
+#define octeon_has_feature(feature_x) octeon_has_feature_##feature_x()
+
+#endif /* __OCTEON_FEATURE_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/octeon-model.h b/arch/mips/mach-octeon/include/mach/octeon-model.h
new file mode 100644 (file)
index 0000000..22d6df6
--- /dev/null
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_MODEL_H__
+#define __OCTEON_MODEL_H__
+
+/*
+ * NOTE: These must match what is checked in common-config.mk
+ * Defines to represent the different versions of Octeon.
+ *
+ * IMPORTANT: When the default pass is updated for an Octeon Model,
+ * the corresponding change must also be made in the oct-sim script.
+ *
+ * The defines below should be used with the OCTEON_IS_MODEL() macro to
+ * determine what model of chip the software is running on.  Models ending
+ * in 'XX' match multiple models (families), while specific models match only
+ * that model.  If a pass (revision) is specified, then only that revision
+ * will be matched.  Care should be taken when checking for both specific
+ * models and families that the specific models are checked for first.
+ * While these defines are similar to the processor ID, they are not intended
+ * to be used by anything other that the OCTEON_IS_MODEL framework, and
+ * the values are subject to change at anytime without notice.
+ *
+ * NOTE: only the OCTEON_IS_MODEL() macro/function and the OCTEON_CN* macros
+ * should be used outside of this file.  All other macros are for internal
+ * use only, and may change without notice.
+ */
+
+#define OCTEON_FAMILY_MASK      0x00ffff00
+#define OCTEON_PRID_MASK       0x00ffffff
+
+/* Flag bits in top byte */
+/* Ignores revision in model checks */
+#define OM_IGNORE_REVISION        0x01000000
+/* Check submodels */
+#define OM_CHECK_SUBMODEL         0x02000000
+/* Match all models previous than the one specified */
+#define OM_MATCH_PREVIOUS_MODELS  0x04000000
+/* Ignores the minor revison on newer parts */
+#define OM_IGNORE_MINOR_REVISION  0x08000000
+#define OM_FLAG_MASK              0xff000000
+
+/* Match all cn5XXX Octeon models. */
+#define OM_MATCH_5XXX_FAMILY_MODELS     0x20000000
+/* Match all cn6XXX Octeon models. */
+#define OM_MATCH_6XXX_FAMILY_MODELS     0x40000000
+/* Match all cnf7XXX Octeon models. */
+#define OM_MATCH_F7XXX_FAMILY_MODELS    0x80000000
+/* Match all cn7XXX Octeon models. */
+#define OM_MATCH_7XXX_FAMILY_MODELS     0x10000000
+#define OM_MATCH_FAMILY_MODELS         (OM_MATCH_5XXX_FAMILY_MODELS | \
+                                        OM_MATCH_6XXX_FAMILY_MODELS |  \
+                                        OM_MATCH_F7XXX_FAMILY_MODELS | \
+                                        OM_MATCH_7XXX_FAMILY_MODELS)
+
+/*
+ * CN7XXX models with new revision encoding
+ */
+
+#define OCTEON_CNF75XX_PASS1_0  0x000d9800
+#define OCTEON_CNF75XX_PASS1_2  0x000d9802
+#define OCTEON_CNF75XX_PASS1_3  0x000d9803
+#define OCTEON_CNF75XX          (OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CNF75XX_PASS1_X                                 \
+       (OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN73XX_PASS1_0   0x000d9700
+#define OCTEON_CN73XX_PASS1_1   0x000d9701
+#define OCTEON_CN73XX_PASS1_2   0x000d9702
+#define OCTEON_CN73XX_PASS1_3   0x000d9703
+#define OCTEON_CN73XX           (OCTEON_CN73XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN73XX_PASS1_X                                  \
+       (OCTEON_CN73XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN72XX          OCTEON_CN73XX
+
+#define OCTEON_CN23XX          OCTEON_CN73XX
+#define OCTEON_CN23XX_PASS1_2  OCTEON_CN73XX_PASS1_2
+#define OCTEON_CN23XX_PASS1_3  OCTEON_CN73XX_PASS1_3
+
+#define OCTEON_CN70XX_PASS1_0   0x000d9600
+#define OCTEON_CN70XX_PASS1_1   0x000d9601
+#define OCTEON_CN70XX_PASS1_2   0x000d9602
+
+#define OCTEON_CN70XX_PASS2_0   0x000d9608
+
+#define OCTEON_CN70XX           (OCTEON_CN70XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN70XX_PASS1_X                                  \
+       (OCTEON_CN70XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN70XX_PASS2_X                                  \
+       (OCTEON_CN70XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN71XX          OCTEON_CN70XX
+
+#define OCTEON_CN78XX_PASS1_0   0x000d9500
+#define OCTEON_CN78XX_PASS1_1   0x000d9501
+#define OCTEON_CN78XX_PASS2_0   0x000d9508
+
+#define OCTEON_CN78XX           (OCTEON_CN78XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN78XX_PASS1_X                                  \
+       (OCTEON_CN78XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN78XX_PASS2_X                                  \
+       (OCTEON_CN78XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN76XX            (0x000d9540 | OM_CHECK_SUBMODEL)
+
+/*
+ * CNF7XXX models with new revision encoding
+ */
+#define OCTEON_CNF71XX_PASS1_0  0x000d9400
+#define OCTEON_CNF71XX_PASS1_1  0x000d9401
+
+#define OCTEON_CNF71XX          (OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CNF71XX_PASS1_X                                 \
+       (OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+/*
+ * CN6XXX models with new revision encoding
+ */
+#define OCTEON_CN68XX_PASS1_0   0x000d9100
+#define OCTEON_CN68XX_PASS1_1   0x000d9101
+#define OCTEON_CN68XX_PASS2_0   0x000d9108
+#define OCTEON_CN68XX_PASS2_1   0x000d9109
+#define OCTEON_CN68XX_PASS2_2   0x000d910a
+
+#define OCTEON_CN68XX           (OCTEON_CN68XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN68XX_PASS1_X                                  \
+       (OCTEON_CN68XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN68XX_PASS2_X                                  \
+       (OCTEON_CN68XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN68XX_PASS1    OCTEON_CN68XX_PASS1_X
+#define OCTEON_CN68XX_PASS2    OCTEON_CN68XX_PASS2_X
+
+#define OCTEON_CN66XX_PASS1_0   0x000d9200
+#define OCTEON_CN66XX_PASS1_2   0x000d9202
+
+#define OCTEON_CN66XX           (OCTEON_CN66XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN66XX_PASS1_X                                  \
+       (OCTEON_CN66XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN63XX_PASS1_0   0x000d9000
+#define OCTEON_CN63XX_PASS1_1   0x000d9001
+#define OCTEON_CN63XX_PASS1_2   0x000d9002
+#define OCTEON_CN63XX_PASS2_0   0x000d9008
+#define OCTEON_CN63XX_PASS2_1   0x000d9009
+#define OCTEON_CN63XX_PASS2_2   0x000d900a
+
+#define OCTEON_CN63XX           (OCTEON_CN63XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN63XX_PASS1_X                                  \
+       (OCTEON_CN63XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN63XX_PASS2_X                                  \
+       (OCTEON_CN63XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+/* CN62XX is same as CN63XX with 1 MB cache */
+#define OCTEON_CN62XX           OCTEON_CN63XX
+
+#define OCTEON_CN61XX_PASS1_0   0x000d9300
+#define OCTEON_CN61XX_PASS1_1   0x000d9301
+
+#define OCTEON_CN61XX           (OCTEON_CN61XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN61XX_PASS1_X                                  \
+       (OCTEON_CN61XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+/* CN60XX is same as CN61XX with 512 KB cache */
+#define OCTEON_CN60XX           OCTEON_CN61XX
+
+/* This matches the complete family of CN3xxx CPUs, and not subsequent models */
+#define OCTEON_CN6XXX                                          \
+       (OCTEON_CN63XX_PASS1_0 | OM_MATCH_6XXX_FAMILY_MODELS)
+#define OCTEON_CNF7XXX                                         \
+       (OCTEON_CNF71XX_PASS1_0 | OM_MATCH_F7XXX_FAMILY_MODELS)
+#define OCTEON_CN7XXX                                          \
+       (OCTEON_CN78XX_PASS1_0 | OM_MATCH_7XXX_FAMILY_MODELS)
+
+/*
+ * The revision byte (low byte) has two different encodings.
+ * CN3XXX:
+ *
+ *     bits
+ *     <7:5>: reserved (0)
+ *     <4>:   alternate package
+ *     <3:0>: revision
+ *
+ * CN5XXX and older models:
+ *
+ *     bits
+ *     <7>:   reserved (0)
+ *     <6>:   alternate package
+ *     <5:3>: major revision
+ *     <2:0>: minor revision
+ */
+
+/* Masks used for the various types of model/family/revision matching */
+#define OCTEON_38XX_FAMILY_MASK      0x00ffff00
+#define OCTEON_38XX_FAMILY_REV_MASK  0x00ffff0f
+#define OCTEON_38XX_MODEL_MASK       0x00ffff10
+#define OCTEON_38XX_MODEL_REV_MASK                             \
+       (OCTEON_38XX_FAMILY_REV_MASK | OCTEON_38XX_MODEL_MASK)
+
+/* CN5XXX and later use different layout of bits in the revision ID field */
+#define OCTEON_58XX_FAMILY_MASK      OCTEON_38XX_FAMILY_MASK
+#define OCTEON_58XX_FAMILY_REV_MASK  0x00ffff3f
+#define OCTEON_58XX_MODEL_MASK       0x00ffff40
+#define OCTEON_58XX_MODEL_REV_MASK                             \
+       (OCTEON_58XX_FAMILY_REV_MASK | OCTEON_58XX_MODEL_MASK)
+#define OCTEON_58XX_MODEL_MINOR_REV_MASK               \
+       (OCTEON_58XX_MODEL_REV_MASK & 0x00ffff38)
+#define OCTEON_5XXX_MODEL_MASK       0x00ff0fc0
+
+#define __OCTEON_MATCH_MASK__(X, Y, Z)              \
+       ({                                           \
+               typeof(X) x = (X);                   \
+               typeof(Y) y = (Y);                   \
+               typeof(Z) z = (Z);                   \
+               (x & z) == (y & z);                  \
+        })
+
+/*
+ * __OCTEON_IS_MODEL_COMPILE__(arg_model, chip_model)
+ * returns true if chip_model is identical or belong to the OCTEON
+ * model group specified in arg_model.
+ */
+
+/* Helper macros to make to following macro compacter */
+#define OM_MASK                        OM_FLAG_MASK
+#define OM_MATCH_MASK          __OCTEON_MATCH_MASK__
+#define OM_MATCH_PREVIOUS      OM_MATCH_PREVIOUS_MODELS
+
+#define __OCTEON_IS_MODEL_COMPILE__(A, B)                              \
+       ({                                                              \
+       typeof(A) a = (A);                                              \
+       typeof(B) b = (B);                                              \
+       (((((((a) & OM_MASK) == (OM_IGNORE_REVISION | OM_CHECK_SUBMODEL)) && \
+           OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) ||         \
+          ((((a) & OM_MASK) == 0) &&                                   \
+           OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_REV_MASK)) ||    \
+          ((((a) & OM_MASK) == OM_IGNORE_MINOR_REVISION) &&            \
+           OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MINOR_REV_MASK)) || \
+          ((((a) & OM_MASK) == OM_CHECK_SUBMODEL) &&                   \
+           OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) ||         \
+          ((((a) & OM_MASK) == OM_IGNORE_REVISION) &&                  \
+           OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_MASK)) ||        \
+          ((((a) & (OM_MATCH_5XXX_FAMILY_MODELS)) ==                   \
+            OM_MATCH_5XXX_FAMILY_MODELS) &&                            \
+           ((b & OCTEON_PRID_MASK) < OCTEON_CN63XX_PASS1_0)) ||        \
+          ((((a) & (OM_MATCH_6XXX_FAMILY_MODELS)) ==                   \
+            OM_MATCH_6XXX_FAMILY_MODELS) &&                            \
+           ((b & OCTEON_PRID_MASK) >= OCTEON_CN63XX_PASS1_0) &&        \
+           ((b & OCTEON_PRID_MASK) < OCTEON_CNF71XX_PASS1_0)) ||       \
+          ((((a) & (OM_MATCH_F7XXX_FAMILY_MODELS)) ==                  \
+            OM_MATCH_F7XXX_FAMILY_MODELS) &&                           \
+           ((b & OCTEON_PRID_MASK) >= OCTEON_CNF71XX_PASS1_0) &&       \
+           ((b & OCTEON_PRID_MASK) < OCTEON_CN78XX_PASS1_0)) ||        \
+          ((((a) & (OM_MATCH_7XXX_FAMILY_MODELS)) ==                   \
+            OM_MATCH_7XXX_FAMILY_MODELS) && ((b & OCTEON_PRID_MASK) >= \
+                                             OCTEON_CN78XX_PASS1_0)) || \
+          ((((a) & (OM_MATCH_PREVIOUS)) == OM_MATCH_PREVIOUS) &&       \
+           (((b) & OCTEON_58XX_MODEL_MASK) < ((a) & OCTEON_58XX_MODEL_MASK))) \
+                 )));                                                  \
+       })
+
+#ifndef __ASSEMBLY__
+
+#ifndef OCTEON_IS_MODEL
+
+static inline int __octeon_is_model_runtime_internal__(u32 model)
+{
+       u32 cpuid = read_c0_prid();
+
+       return __OCTEON_IS_MODEL_COMPILE__(model, cpuid);
+}
+
+static inline int __octeon_is_model_runtime__(u32 model)
+{
+       return __octeon_is_model_runtime_internal__(model);
+}
+
+/*
+ * The OCTEON_IS_MODEL macro should be used for all Octeon model checking done
+ * in a program.
+ * This should be kept runtime if at all possible  and must be conditionalized
+ * with OCTEON_IS_COMMON_BINARY() if runtime checking support is required.
+ *
+ * Use of the macro in preprocessor directives ( #if OCTEON_IS_MODEL(...) )
+ * is NOT SUPPORTED, and should be replaced with CVMX_COMPILED_FOR()
+ * I.e.:
+ * #if OCTEON_IS_MODEL(OCTEON_CN56XX)  ->  #if CVMX_COMPILED_FOR(OCTEON_CN56XX)
+ */
+#define OCTEON_IS_MODEL(x)     __octeon_is_model_runtime__(x)
+#define OCTEON_IS_COMMON_BINARY() 1
+#undef OCTEON_MODEL
+#endif
+
+#define OCTEON_IS_OCTEON2()                                            \
+       (OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX))
+
+#define OCTEON_IS_OCTEON3()    OCTEON_IS_MODEL(OCTEON_CN7XXX)
+
+const char *octeon_model_get_string(u32 chip_id);
+const char *octeon_model_get_string_buffer(u32 chip_id, char *buffer);
+
+/**
+ * Return the octeon family, i.e., ProcessorID of the PrID register.
+ *
+ * @return the octeon family on success, ((u32)-1) on error.
+ */
+static inline u32 cvmx_get_octeon_family(void)
+{
+       return (read_c0_prid() & OCTEON_FAMILY_MASK);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __OCTEON_MODEL_H__ */
diff --git a/arch/mips/mach-octeon/include/mach/octeon_ddr.h b/arch/mips/mach-octeon/include/mach/octeon_ddr.h
new file mode 100644 (file)
index 0000000..4473be4
--- /dev/null
@@ -0,0 +1,982 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_DDR_H_
+#define __OCTEON_DDR_H_
+
+#include <env.h>
+#include <linux/compat.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <mach/octeon-model.h>
+#include <mach/cvmx/cvmx-lmcx-defs.h>
+
+/* Mapping is done starting from 0x11800.80000000 */
+#define CVMX_L2C_CTL           0x00800000
+#define CVMX_L2C_BIG_CTL       0x00800030
+#define CVMX_L2C_TADX_INT(i)   (0x00a00028 + (((i) & 7) * 0x40000))
+#define CVMX_L2C_MCIX_INT(i)   (0x00c00028 + (((i) & 3) * 0x40000))
+
+/* Some "external" (non-LMC) registers */
+#define CVMX_IPD_CLK_COUNT             0x00014F0000000338
+#define CVMX_FPA_CLK_COUNT             0x00012800000000F0
+
+#define CVMX_NODE_MEM_SHIFT    40
+
+#define DDR_INTERFACE_MAX      4
+
+/* Private data struct */
+struct ddr_priv {
+       void __iomem *lmc_base;
+       void __iomem *l2c_base;
+
+       bool ddr_clock_initialized[DDR_INTERFACE_MAX];
+       bool ddr_memory_preserved;
+       u32 flags;
+
+       struct ram_info info;
+};
+
+/* Short cut to convert a number to megabytes */
+#define MB(X)                  ((u64)(X) * (u64)(1024 * 1024))
+
+#define octeon_is_cpuid(x)     (__OCTEON_IS_MODEL_COMPILE__(x, read_c0_prid()))
+
+#define strtoull               simple_strtoull
+
+/* Access LMC registers */
+static inline u64 lmc_rd(struct ddr_priv *priv, u64 addr)
+{
+       return ioread64(priv->lmc_base + addr);
+}
+
+static inline void lmc_wr(struct ddr_priv *priv, u64 addr, u64 val)
+{
+       iowrite64(val, priv->lmc_base + addr);
+}
+
+/* Access L2C registers */
+static inline u64 l2c_rd(struct ddr_priv *priv, u64 addr)
+{
+       return ioread64(priv->l2c_base + addr);
+}
+
+static inline void l2c_wr(struct ddr_priv *priv, u64 addr, u64 val)
+{
+       iowrite64(val, priv->l2c_base + addr);
+}
+
+/* Access other CSR registers not located inside the LMC address space */
+static inline u64 csr_rd(u64 addr)
+{
+       void __iomem *base;
+
+       base = ioremap_nocache(addr, 0x100);
+       return ioread64(base);
+}
+
+static inline void csr_wr(u64 addr, u64 val)
+{
+       void __iomem *base;
+
+       base = ioremap_nocache(addr, 0x100);
+       return iowrite64(val, base);
+}
+
+/* "Normal" access, without any offsets and/or mapping */
+static inline u64 cvmx_read64_uint64(u64 addr)
+{
+       return readq((void *)addr);
+}
+
+static inline void cvmx_write64_uint64(u64 addr, u64 val)
+{
+       writeq(val, (void *)addr);
+}
+
+/* Failsafe mode */
+#define FLAG_FAILSAFE_MODE             0x01000
+/* Note that the DDR clock initialized flags must be contiguous */
+/* Clock for DDR 0 initialized */
+#define FLAG_DDR0_CLK_INITIALIZED      0x02000
+/* Clock for DDR 1 initialized */
+#define FLAG_DDR1_CLK_INITIALIZED      0x04000
+/* Clock for DDR 2 initialized */
+#define FLAG_DDR2_CLK_INITIALIZED      0x08000
+/* Clock for DDR 3 initialized */
+#define FLAG_DDR3_CLK_INITIALIZED      0x10000
+/* Loaded into RAM externally */
+#define FLAG_RAM_RESIDENT              0x20000
+/* Verbose DDR information */
+#define FLAG_DDR_VERBOSE               0x40000
+/* Check env. for DDR variables */
+#define FLAG_DDR_DEBUG                 0x80000
+#define FLAG_DDR_TRACE_INIT            0x100000
+#define FLAG_MEMORY_PRESERVED          0x200000
+#define FLAG_DFM_VERBOSE               0x400000
+#define FLAG_DFM_TRACE_INIT            0x800000
+/* DFM memory clock initialized */
+#define FLAG_DFM_CLK_INITIALIZED       0x1000000
+/* EEPROM clock descr. missing */
+#define FLAG_CLOCK_DESC_MISSING                0x2000000
+/* EEPROM board descr. missing */
+#define FLAG_BOARD_DESC_MISSING                0x4000000
+#define FLAG_DDR_PROMPT                        0x8000000
+
+#ifndef DDR_NO_DEBUG
+static inline int ddr_verbose(struct ddr_priv *priv)
+{
+       return !!(priv->flags & FLAG_DDR_VERBOSE);
+}
+
+static inline char *ddr_getenv_debug(struct ddr_priv *priv, char *name)
+{
+       if (priv->flags & FLAG_FAILSAFE_MODE)
+               return NULL;
+
+       if (priv->flags & FLAG_DDR_DEBUG)
+               return env_get(name);
+
+       return NULL;
+}
+#else
+static inline int ddr_verbose(void)
+{
+       return 0;
+}
+#endif
+
+/* turn the variable name into a string */
+#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
+#define CVMX_TMP_STR2(x) #x
+
+#define CVMX_SYNC asm volatile ("sync" : : : "memory")
+
+#define CVMX_CACHE(op, address, offset)                                        \
+       asm volatile ("cache " CVMX_TMP_STR(op) ", "                    \
+                     CVMX_TMP_STR(offset) "(%[rbase])"                 \
+                     : : [rbase] "d" (address))
+
+/* unlock the state */
+#define CVMX_CACHE_WBIL2(address, offset)      \
+       CVMX_CACHE(23, address, offset)
+
+/* complete prefetches, invalidate entire dcache */
+#define CVMX_DCACHE_INVALIDATE                                 \
+       { CVMX_SYNC; asm volatile ("cache 9, 0($0)" : : ); }
+
+/**
+ * cvmx_l2c_cfg
+ *
+ * Specify the RSL base addresses for the block
+ *
+ *                  L2C_CFG = L2C Configuration
+ *
+ * Description:
+ */
+union cvmx_l2c_cfg {
+       u64 u64;
+       struct cvmx_l2c_cfg_s {
+               uint64_t reserved_20_63:44;
+               uint64_t bstrun:1;
+               uint64_t lbist:1;
+               uint64_t xor_bank:1;
+               uint64_t dpres1:1;
+               uint64_t dpres0:1;
+               uint64_t dfill_dis:1;
+               uint64_t fpexp:4;
+               uint64_t fpempty:1;
+               uint64_t fpen:1;
+               uint64_t idxalias:1;
+               uint64_t mwf_crd:4;
+               uint64_t rsp_arb_mode:1;
+               uint64_t rfb_arb_mode:1;
+               uint64_t lrf_arb_mode:1;
+       } s;
+};
+
+/**
+ * cvmx_l2c_ctl
+ *
+ * L2C_CTL = L2C Control
+ *
+ *
+ * Notes:
+ * (1) If MAXVAB is != 0, VAB_THRESH should be less than MAXVAB.
+ *
+ * (2) L2DFDBE and L2DFSBE allows software to generate L2DSBE, L2DDBE, VBFSBE,
+ * and VBFDBE errors for the purposes of testing error handling code.  When
+ * one (or both) of these bits are set a PL2 which misses in the L2 will fill
+ * with the appropriate error in the first 2 OWs of the fill. Software can
+ * determine which OW pair gets the error by choosing the desired fill order
+ * (address<6:5>).  A PL2 which hits in the L2 will not inject any errors.
+ * Therefore sending a WBIL2 prior to the PL2 is recommended to make a miss
+ * likely (if multiple processors are involved software must be careful to be
+ * sure no other processor or IO device can bring the block into the L2).
+ *
+ * To generate a VBFSBE or VBFDBE, software must first get the cache block
+ * into the cache with an error using a PL2 which misses the L2.  Then a
+ * store partial to a portion of the cache block without the error must
+ * change the block to dirty.  Then, a subsequent WBL2/WBIL2/victim will
+ * trigger the VBFSBE/VBFDBE error.
+ */
+union cvmx_l2c_ctl {
+       u64 u64;
+       struct cvmx_l2c_ctl_s {
+               uint64_t reserved_29_63:35;
+               uint64_t rdf_fast:1;
+               uint64_t disstgl2i:1;
+               uint64_t l2dfsbe:1;
+               uint64_t l2dfdbe:1;
+               uint64_t discclk:1;
+               uint64_t maxvab:4;
+               uint64_t maxlfb:4;
+               uint64_t rsp_arb_mode:1;
+               uint64_t xmc_arb_mode:1;
+               uint64_t reserved_2_13:12;
+               uint64_t disecc:1;
+               uint64_t disidxalias:1;
+       } s;
+
+       struct cvmx_l2c_ctl_cn73xx {
+               uint64_t reserved_32_63:32;
+               uint64_t ocla_qos:3;
+               uint64_t reserved_28_28:1;
+               uint64_t disstgl2i:1;
+               uint64_t reserved_25_26:2;
+               uint64_t discclk:1;
+               uint64_t reserved_16_23:8;
+               uint64_t rsp_arb_mode:1;
+               uint64_t xmc_arb_mode:1;
+               uint64_t rdf_cnt:8;
+               uint64_t reserved_4_5:2;
+               uint64_t disldwb:1;
+               uint64_t dissblkdty:1;
+               uint64_t disecc:1;
+               uint64_t disidxalias:1;
+       } cn73xx;
+
+       struct cvmx_l2c_ctl_cn73xx cn78xx;
+};
+
+/**
+ * cvmx_l2c_big_ctl
+ *
+ * L2C_BIG_CTL = L2C Big memory control register
+ *
+ *
+ * Notes:
+ * (1) BIGRD interrupts can occur during normal operation as the PP's are
+ * allowed to prefetch to non-existent memory locations.  Therefore,
+ * BIGRD is for informational purposes only.
+ *
+ * (2) When HOLEWR/BIGWR blocks a store L2C_VER_ID, L2C_VER_PP, L2C_VER_IOB,
+ * and L2C_VER_MSC will be loaded just like a store which is blocked by VRTWR.
+ * Additionally, L2C_ERR_XMC will be loaded.
+ */
+union cvmx_l2c_big_ctl {
+       u64 u64;
+       struct cvmx_l2c_big_ctl_s {
+               uint64_t reserved_8_63:56;
+               uint64_t maxdram:4;
+               uint64_t reserved_0_3:4;
+       } s;
+       struct cvmx_l2c_big_ctl_cn61xx {
+               uint64_t reserved_8_63:56;
+               uint64_t maxdram:4;
+               uint64_t reserved_1_3:3;
+               uint64_t disable:1;
+       } cn61xx;
+       struct cvmx_l2c_big_ctl_cn61xx cn63xx;
+       struct cvmx_l2c_big_ctl_cn61xx cn66xx;
+       struct cvmx_l2c_big_ctl_cn61xx cn68xx;
+       struct cvmx_l2c_big_ctl_cn61xx cn68xxp1;
+       struct cvmx_l2c_big_ctl_cn70xx {
+               uint64_t reserved_8_63:56;
+               uint64_t maxdram:4;
+               uint64_t reserved_1_3:3;
+               uint64_t disbig:1;
+       } cn70xx;
+       struct cvmx_l2c_big_ctl_cn70xx cn70xxp1;
+       struct cvmx_l2c_big_ctl_cn70xx cn73xx;
+       struct cvmx_l2c_big_ctl_cn70xx cn78xx;
+       struct cvmx_l2c_big_ctl_cn70xx cn78xxp1;
+       struct cvmx_l2c_big_ctl_cn61xx cnf71xx;
+       struct cvmx_l2c_big_ctl_cn70xx cnf75xx;
+};
+
+struct rlevel_byte_data {
+       int delay;
+       int loop_total;
+       int loop_count;
+       int best;
+       u64 bm;
+       int bmerrs;
+       int sqerrs;
+       int bestsq;
+};
+
+#define DEBUG_VALIDATE_BITMASK 0
+#if DEBUG_VALIDATE_BITMASK
+#define debug_bitmask_print printf
+#else
+#define debug_bitmask_print(...)
+#endif
+
+#define RLEVEL_BITMASK_TRAILING_BITS_ERROR      5
+// FIXME? now less than TOOLONG
+#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR        11
+#define RLEVEL_BITMASK_NARROW_ERROR             6
+#define RLEVEL_BITMASK_BLANK_ERROR              100
+#define RLEVEL_BITMASK_TOOLONG_ERROR            12
+#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR        50
+#define RLEVEL_ADJACENT_DELAY_ERROR             30
+
+/*
+ * Apply a filter to the BITMASK results returned from Octeon
+ * read-leveling to determine the most likely delay result.  This
+ * computed delay may be used to qualify the delay result returned by
+ * Octeon. Accumulate an error penalty for invalid characteristics of
+ * the bitmask so that they can be used to select the most reliable
+ * results.
+ *
+ * The algorithm searches for the largest contiguous MASK within a
+ * maximum RANGE of bits beginning with the MSB.
+ *
+ * 1. a MASK with a WIDTH less than 4 will be penalized
+ * 2. Bubbles in the bitmask that occur before or after the MASK
+ *    will be penalized
+ * 3. If there are no trailing bubbles then extra bits that occur
+ *    beyond the maximum RANGE will be penalized.
+ *
+ *   +++++++++++++++++++++++++++++++++++++++++++++++++++
+ *   +                                                 +
+ *   +   e.g. bitmask = 27B00                          +
+ *   +                                                 +
+ *   +   63                  +--- mstart           0   +
+ *   +   |                   |                     |   +
+ *   +   |         +---------+     +--- fb         |   +
+ *   +   |         |  range  |     |               |   +
+ *   +   V         V         V     V               V   +
+ *   +                                                 +
+ *   +   0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0   +
+ *   +                                                 +
+ *   +           ^     ^     ^                         +
+ *   +           |     | mask|                         +
+ *   +     lb ---+     +-----+                         +
+ *   +                  width                          +
+ *   +                                                 +
+ *   +++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+struct rlevel_bitmask {
+       u64 bm;
+       u8 mstart;
+       u8 width;
+       int errs;
+};
+
+#define MASKRANGE_BITS 6
+#define MASKRANGE      ((1 << MASKRANGE_BITS) - 1)
+
+/* data field addresses in the DDR2 SPD eeprom */
+enum ddr2_spd_addrs {
+       DDR2_SPD_BYTES_PROGRAMMED       = 0,
+       DDR2_SPD_TOTAL_BYTES            = 1,
+       DDR2_SPD_MEM_TYPE               = 2,
+       DDR2_SPD_NUM_ROW_BITS           = 3,
+       DDR2_SPD_NUM_COL_BITS           = 4,
+       DDR2_SPD_NUM_RANKS              = 5,
+       DDR2_SPD_CYCLE_CLX              = 9,
+       DDR2_SPD_CONFIG_TYPE            = 11,
+       DDR2_SPD_REFRESH                = 12,
+       DDR2_SPD_SDRAM_WIDTH            = 13,
+       DDR2_SPD_BURST_LENGTH           = 16,
+       DDR2_SPD_NUM_BANKS              = 17,
+       DDR2_SPD_CAS_LATENCY            = 18,
+       DDR2_SPD_DIMM_TYPE              = 20,
+       DDR2_SPD_CYCLE_CLX1             = 23,
+       DDR2_SPD_CYCLE_CLX2             = 25,
+       DDR2_SPD_TRP                    = 27,
+       DDR2_SPD_TRRD                   = 28,
+       DDR2_SPD_TRCD                   = 29,
+       DDR2_SPD_TRAS                   = 30,
+       DDR2_SPD_TWR                    = 36,
+       DDR2_SPD_TWTR                   = 37,
+       DDR2_SPD_TRFC_EXT               = 40,
+       DDR2_SPD_TRFC                   = 42,
+       DDR2_SPD_CHECKSUM               = 63,
+       DDR2_SPD_MFR_ID                 = 64
+};
+
+/* data field addresses in the DDR2 SPD eeprom */
+enum ddr3_spd_addrs {
+       DDR3_SPD_BYTES_PROGRAMMED                       =  0,
+       DDR3_SPD_REVISION                               =  1,
+       DDR3_SPD_KEY_BYTE_DEVICE_TYPE                   =  2,
+       DDR3_SPD_KEY_BYTE_MODULE_TYPE                   =  3,
+       DDR3_SPD_DENSITY_BANKS                          =  4,
+       DDR3_SPD_ADDRESSING_ROW_COL_BITS                =  5,
+       DDR3_SPD_NOMINAL_VOLTAGE                        =  6,
+       DDR3_SPD_MODULE_ORGANIZATION                    =  7,
+       DDR3_SPD_MEMORY_BUS_WIDTH                       =  8,
+       DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR         =  9,
+       DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND               = 10,
+       DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR                = 11,
+       DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN              = 12,
+       DDR3_SPD_CAS_LATENCIES_LSB                      = 14,
+       DDR3_SPD_CAS_LATENCIES_MSB                      = 15,
+       DDR3_SPD_MIN_CAS_LATENCY_TAAMIN                 = 16,
+       DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN              = 17,
+       DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN              = 18,
+       DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN           = 19,
+       DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN         = 20,
+       DDR3_SPD_UPPER_NIBBLES_TRAS_TRC                 = 21,
+       DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN       = 22,
+       DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN          = 23,
+       DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN       = 24,
+       DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN       = 25,
+       DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN    = 26,
+       DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27,
+       DDR3_SPD_UPPER_NIBBLE_TFAW                      = 28,
+       DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN         = 29,
+       DDR3_SPD_SDRAM_OPTIONAL_FEATURES                = 30,
+       DDR3_SPD_SDRAM_THERMAL_REFRESH_OPTIONS          = 31,
+       DDR3_SPD_MODULE_THERMAL_SENSOR                  = 32,
+       DDR3_SPD_SDRAM_DEVICE_TYPE                      = 33,
+       DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN         = 34,
+       DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN            = 35,
+       DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN         = 36,
+       DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN    = 37,
+       DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN     = 38,
+       DDR3_SPD_REFERENCE_RAW_CARD                     = 62,
+       DDR3_SPD_ADDRESS_MAPPING                        = 63,
+       DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB           = 65,
+       DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB           = 66,
+       DDR3_SPD_REGISTER_REVISION_NUMBER               = 67,
+       DDR3_SPD_MODULE_SERIAL_NUMBER                   = 122,
+       DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE  = 126,
+       DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE  = 127,
+       DDR3_SPD_MODULE_PART_NUMBER                     = 128
+};
+
+/* data field addresses in the DDR4 SPD eeprom */
+enum ddr4_spd_addrs {
+       DDR4_SPD_BYTES_PROGRAMMED                       =  0,
+       DDR4_SPD_REVISION                               =  1,
+       DDR4_SPD_KEY_BYTE_DEVICE_TYPE                   =  2,
+       DDR4_SPD_KEY_BYTE_MODULE_TYPE                   =  3,
+       DDR4_SPD_DENSITY_BANKS                          =  4,
+       DDR4_SPD_ADDRESSING_ROW_COL_BITS                =  5,
+       DDR4_SPD_PACKAGE_TYPE                           =  6,
+       DDR4_SPD_OPTIONAL_FEATURES                      =  7,
+       DDR4_SPD_THERMAL_REFRESH_OPTIONS                =  8,
+       DDR4_SPD_OTHER_OPTIONAL_FEATURES                =  9,
+       DDR4_SPD_SECONDARY_PACKAGE_TYPE                 = 10,
+       DDR4_SPD_MODULE_NOMINAL_VOLTAGE                 = 11,
+       DDR4_SPD_MODULE_ORGANIZATION                    = 12,
+       DDR4_SPD_MODULE_MEMORY_BUS_WIDTH                = 13,
+       DDR4_SPD_MODULE_THERMAL_SENSOR                  = 14,
+       DDR4_SPD_RESERVED_BYTE15                        = 15,
+       DDR4_SPD_RESERVED_BYTE16                        = 16,
+       DDR4_SPD_TIMEBASES                              = 17,
+       DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN           = 18,
+       DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX           = 19,
+       DDR4_SPD_CAS_LATENCIES_BYTE0                    = 20,
+       DDR4_SPD_CAS_LATENCIES_BYTE1                    = 21,
+       DDR4_SPD_CAS_LATENCIES_BYTE2                    = 22,
+       DDR4_SPD_CAS_LATENCIES_BYTE3                    = 23,
+       DDR4_SPD_MIN_CAS_LATENCY_TAAMIN                 = 24,
+       DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN              = 25,
+       DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN         = 26,
+       DDR4_SPD_UPPER_NIBBLES_TRAS_TRC                 = 27,
+       DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN       = 28,
+       DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN          = 29,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN      = 30,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN      = 31,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN      = 32,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN      = 33,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN      = 34,
+       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN      = 35,
+       DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN     = 36,
+       DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN     = 37,
+       DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN    = 38,
+       DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN    = 39,
+       DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN         = 40,
+       DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN    = 117,
+       DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118,
+       DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119,
+       DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120,
+       DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN    = 121,
+       DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN      = 122,
+       DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN            = 123,
+       DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX          = 124,
+       DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN          = 125,
+       DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE  = 126,
+       DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE  = 127,
+       DDR4_SPD_REFERENCE_RAW_CARD                     = 130,
+       DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE           = 131,
+       DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB           = 133,
+       DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB           = 134,
+       DDR4_SPD_REGISTER_REVISION_NUMBER               = 135,
+       DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136,
+       DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL      = 137,
+       DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK       = 138,
+};
+
+#define SPD_EEPROM_SIZE                (DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK + 1)
+
+struct impedence_values {
+       unsigned char *rodt_ohms;
+       unsigned char *rtt_nom_ohms;
+       unsigned char *rtt_nom_table;
+       unsigned char *rtt_wr_ohms;
+       unsigned char *dic_ohms;
+       short *drive_strength;
+       short *dqx_strength;
+};
+
+#define RODT_OHMS_COUNT        8
+#define RTT_NOM_OHMS_COUNT     8
+#define RTT_NOM_TABLE_COUNT    8
+#define RTT_WR_OHMS_COUNT      8
+#define DIC_OHMS_COUNT         3
+#define DRIVE_STRENGTH_COUNT  15
+
+/*
+ * Structure that provides DIMM information, either in the form of an SPD
+ * TWSI address, or a pointer to an array that contains SPD data. One of
+ * the two fields must be valid.
+ */
+struct dimm_config {
+       u16 spd_addrs[2]; /* TWSI address of SPD, 0 if not used */
+       u8 *spd_ptrs[2]; /* pointer to SPD data array, NULL if not used */
+       int spd_cached[2];
+       u8 spd_data[2][SPD_EEPROM_SIZE];
+};
+
+struct dimm_odt_config {
+       u8 odt_ena;            /* FIX: dqx_ctl for Octeon 3 DDR4 */
+       u64 odt_mask;          /* FIX: wodt_mask for Octeon 3 */
+       union cvmx_lmcx_modereg_params1 modereg_params1;
+       union cvmx_lmcx_modereg_params2 modereg_params2;
+       u8 qs_dic;             /* FIX: rodt_ctl for Octeon 3 */
+       u64 rodt_ctl;          /* FIX: rodt_mask for Octeon 3 */
+       u8 dic;
+};
+
+struct ddr_delay_config {
+       u32 ddr_board_delay;
+       u8 lmc_delay_clk;
+       u8 lmc_delay_cmd;
+       u8 lmc_delay_dq;
+};
+
+/*
+ * The parameters below make up the custom_lmc_config data structure.
+ * This structure is used to customize the way that the LMC DRAM
+ * Controller is configured for a particular board design.
+ *
+ * The HRM describes LMC Read Leveling which supports automatic
+ * selection of per byte-lane delays.  When measuring the read delays
+ * the LMC configuration software sweeps through a range of settings
+ * for LMC0_COMP_CTL2[RODT_CTL], the Octeon II on-die-termination
+ * resistance and LMC0_MODEREG_PARAMS1[RTT_NOM_XX], the DRAM
+ * on-die-termination resistance.  The minimum and maximum parameters
+ * for rtt_nom_idx and rodt_ctl listed below determine the ranges of
+ * ODT settings used for the measurements.  Note that for rtt_nom an
+ * index is used into a sorted table rather than the direct csr setting
+ * in order to optimize the sweep.
+ *
+ * .min_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms
+ * .max_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms
+ * .min_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms
+ * .max_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms
+ *
+ * The settings below control the Octeon II drive strength for the CK,
+ * ADD/CMD, and DQ/DQS signals.  1=24ohms, 2=26.67ohms, 3=30ohms,
+ * 4=34.3ohms, 5=40ohms, 6=48ohms, 6=60ohms.
+ *
+ * .dqx_ctl: Drive strength control for DDR_DQX/DDR_DQS_X_P/N drivers.
+ * .ck_ctl: Drive strength control for
+ * DDR_CK_X_P/DDR_DIMMX_CSX_L/DDR_DIMMX_ODT_X drivers.
+ * .cmd_ctl: Drive strength control for CMD/A/RESET_L/CKEX drivers.
+ *
+ * The LMC controller software selects the most optimal CAS Latency
+ * that complies with the appropriate SPD values and the frequency
+ * that the DRAMS are being operated.  When operating the DRAMs at
+ * frequencies substantially lower than their rated frequencies it
+ * might be necessary to limit the minimum CAS Latency the LMC
+ * controller software is allowed to select in order to make the DRAM
+ * work reliably.
+ *
+ * .min_cas_latency: Minimum allowed CAS Latency
+ *
+ * The value used for LMC0_RLEVEL_CTL[OFFSET_EN] determine how the
+ * read-leveling information that the Octeon II gathers is interpreted
+ * to determine the per-byte read delays.
+ *
+ * .offset_en: Value used for LMC0_RLEVEL_CTL[OFFSET_EN].
+ * .offset_udimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for UDIMMS.
+ * .offset_rdimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for RDIMMS.
+ *
+ * The LMC configuration software sweeps through a range of ODT
+ * settings while measuring the per-byte read delays.  During those
+ * measurements the software makes an assessment of the quality of the
+ * measurements in order to determine which measurements provide the
+ * most accurate delays.  The automatic settings provide the option to
+ * allow that same assessment to determine the most optimal RODT_CTL
+ * and/or RTT_NOM settings.
+ *
+ * The automatic approach might provide the best means to determine
+ * the settings used for initial poweron of a new design.  However,
+ * the final settings should be determined by board analysis, testing,
+ * and experience.
+ *
+ * .ddr_rtt_nom_auto: 1 means automatically set RTT_NOM value.
+ * .ddr_rodt_ctl_auto: 1 means automatically set RODT_CTL value.
+ *
+ * .rlevel_compute: Enables software interpretation of per-byte read
+ * delays using the measurements collected by the
+ * Octeon II rather than completely relying on the
+ * Octeon II to determine the delays.  1=software
+ * computation is recomended since a more complete
+ * analysis is implemented in software.
+ *
+ * .rlevel_comp_offset: Set to 2 unless instructed differently by Cavium.
+ *
+ * .rlevel_average_loops: Determines the number of times the read-leveling
+ * sequence is run for each rank.  The results is
+ * then averaged across the number of loops. The
+ * default setting is 1.
+ *
+ * .ddr2t_udimm:
+ * .ddr2t_rdimm: Turn on the DDR 2T mode. 2-cycle window for CMD and
+ * address. This mode helps relieve setup time pressure
+ * on the address and command bus. Please refer to
+ * Micron's tech note tn_47_01 titled DDR2-533 Memory
+ * Design Guide for Two Dimm Unbuffered Systems for
+ * physical details.
+ *
+ * .disable_sequential_delay_check: As result of the flyby topology
+ * prescribed in the JEDEC specifications the byte delays should
+ * maintain a consistent increasing or decreasing trend across
+ * the bytes on standard dimms.  This setting can be used disable
+ * that check for unusual circumstances where the check is not
+ * useful.
+ *
+ * .maximum_adjacent_rlevel_delay_increment: An additional sequential
+ * delay check for the delays that result from the flyby
+ * topology. This value specifies the maximum difference between
+ * the delays of adjacent bytes.  A value of 0 disables this
+ * check.
+ *
+ * .fprch2 Front Porch Enable: When set, the turn-off
+ * time for the default DDR_DQ/DQS drivers is FPRCH2 CKs earlier.
+ * 00 = 0 CKs
+ * 01 = 1 CKs
+ * 10 = 2 CKs
+ *
+ * .parity: The parity input signal PAR_IN on each dimm must be
+ * strapped high or low on the board.  This bit is programmed
+ * into LMC0_DIMM_CTL[PARITY] and it must be set to match the
+ * board strapping.  This signal is typically strapped low.
+ *
+ * .mode32b: Enable 32-bit datapath mode.  Set to 1 if only 32 DQ pins
+ * are used. (cn61xx, cn71xx)
+ *
+ * .measured_vref: Set to 1 to measure VREF; set to 0 to compute VREF.
+ *
+ * .dram_connection: Set to 1 if discrete DRAMs; set to 0 if using DIMMs.
+ * This changes the algorithms used to compute VREF.
+ *
+ * .dll_write_offset: FIXME: Add description
+ * .dll_read_offset:  FIXME: Add description
+ */
+
+struct rlevel_table {
+       const char part[20];
+       int speed;
+       u64 rl_rank[4][4];
+};
+
+struct ddr3_custom_config {
+       u8 min_rtt_nom_idx;
+       u8 max_rtt_nom_idx;
+       u8 min_rodt_ctl;
+       u8 max_rodt_ctl;
+       u8 dqx_ctl;
+       u8 ck_ctl;
+       u8 cmd_ctl;
+       u8 ctl_ctl;
+       u8 min_cas_latency;
+       u8 offset_en;
+       u8 offset_udimm;
+       u8 offset_rdimm;
+       u8 rlevel_compute;
+       u8 ddr_rtt_nom_auto;
+       u8 ddr_rodt_ctl_auto;
+       u8 rlevel_comp_offset_udimm;
+       u8 rlevel_comp_offset_rdimm;
+       int8_t ptune_offset;
+       int8_t ntune_offset;
+       u8 rlevel_average_loops;
+       u8 ddr2t_udimm;
+       u8 ddr2t_rdimm;
+       u8 disable_sequential_delay_check;
+       u8 maximum_adjacent_rlevel_delay_increment;
+       u8 parity;
+       u8 fprch2;
+       u8 mode32b;
+       u8 measured_vref;
+       u8 dram_connection;
+       const int8_t *dll_write_offset;
+       const int8_t *dll_read_offset;
+       struct rlevel_table *rl_tbl;
+};
+
+#define DDR_CFG_T_MAX_DIMMS     5
+
+struct ddr_conf {
+       struct dimm_config dimm_config_table[DDR_CFG_T_MAX_DIMMS];
+       struct dimm_odt_config odt_1rank_config[4];
+       struct dimm_odt_config odt_2rank_config[4];
+       struct dimm_odt_config odt_4rank_config[4];
+       struct ddr_delay_config unbuffered;
+       struct ddr_delay_config registered;
+       struct ddr3_custom_config custom_lmc_config;
+};
+
+/* Divide and round results to the nearest integer. */
+static inline u64 divide_nint(u64 dividend, u64 divisor)
+{
+       u64 quotent, remainder;
+
+       quotent   = dividend / divisor;
+       remainder = dividend % divisor;
+       return (quotent + ((remainder * 2) >= divisor));
+}
+
+/* Divide and round results up to the next higher integer. */
+static inline u64 divide_roundup(u64 dividend, u64 divisor)
+{
+       return ((dividend + divisor - 1) / divisor);
+}
+
+enum ddr_type {
+       DDR3_DRAM = 3,
+       DDR4_DRAM = 4,
+};
+
+#define rttnom_none   0         /* Rtt_Nom disabled */
+#define rttnom_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define rttnom_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define rttnom_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define rttnom_20ohm  4         /* RZQ/12 = 240/12 =  20 ohms */
+#define rttnom_30ohm  5         /* RZQ/8  = 240/8  =  30 ohms */
+#define rttnom_rsrv1  6         /* Reserved */
+#define rttnom_rsrv2  7         /* Reserved */
+
+#define rttwr_none    0         /* Dynamic ODT off */
+#define rttwr_60ohm   1         /* RZQ/4  = 240/4  =  60 ohms */
+#define rttwr_120ohm  2         /* RZQ/2  = 240/2  = 120 ohms */
+#define rttwr_rsrv1   3         /* Reserved */
+
+#define dic_40ohm     0         /* RZQ/6  = 240/6  =  40 ohms */
+#define dic_34ohm     1         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define driver_24_ohm   1
+#define driver_27_ohm   2
+#define driver_30_ohm   3
+#define driver_34_ohm   4
+#define driver_40_ohm   5
+#define driver_48_ohm   6
+#define driver_60_ohm   7
+
+#define rodt_ctl_none     0
+#define rodt_ctl_20_ohm   1
+#define rodt_ctl_30_ohm   2
+#define rodt_ctl_40_ohm   3
+#define rodt_ctl_60_ohm   4
+#define rodt_ctl_120_ohm  5
+
+#define ddr4_rttnom_none   0         /* Rtt_Nom disabled */
+#define ddr4_rttnom_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define ddr4_rttnom_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttnom_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define ddr4_rttnom_240ohm 4         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttnom_48ohm  5         /* RZQ/5  = 240/5  =  48 ohms */
+#define ddr4_rttnom_80ohm  6         /* RZQ/3  = 240/3  =  80 ohms */
+#define ddr4_rttnom_34ohm  7         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define ddr4_rttwr_none    0         /* Dynamic ODT off */
+#define ddr4_rttwr_120ohm  1         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttwr_240ohm  2         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttwr_hiz     3         /* HiZ */
+/* This setting is available for cn78xx pass 2, and cn73xx & cnf75xx pass 1 */
+#define ddr4_rttwr_80ohm   4         /* RZQ/3  = 240/3  =  80 ohms */
+
+#define ddr4_dic_34ohm     0         /* RZQ/7  = 240/7  =  34 ohms */
+#define ddr4_dic_48ohm     1         /* RZQ/5  = 240/5  =  48 ohms */
+
+#define ddr4_rttpark_none   0         /* Rtt_Park disabled */
+#define ddr4_rttpark_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define ddr4_rttpark_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttpark_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define ddr4_rttpark_240ohm 4         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttpark_48ohm  5         /* RZQ/5  = 240/5  =  48 ohms */
+#define ddr4_rttpark_80ohm  6         /* RZQ/3  = 240/3  =  80 ohms */
+#define ddr4_rttpark_34ohm  7         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define ddr4_driver_26_ohm   2
+#define ddr4_driver_30_ohm   3
+#define ddr4_driver_34_ohm   4
+#define ddr4_driver_40_ohm   5
+#define ddr4_driver_48_ohm   6
+
+#define ddr4_dqx_driver_24_ohm   1
+#define ddr4_dqx_driver_27_ohm   2
+#define ddr4_dqx_driver_30_ohm   3
+#define ddr4_dqx_driver_34_ohm   4
+#define ddr4_dqx_driver_40_ohm   5
+#define ddr4_dqx_driver_48_ohm   6
+#define ddr4_dqx_driver_60_ohm   7
+
+#define ddr4_rodt_ctl_none     0
+#define ddr4_rodt_ctl_40_ohm   1
+#define ddr4_rodt_ctl_60_ohm   2
+#define ddr4_rodt_ctl_80_ohm   3
+#define ddr4_rodt_ctl_120_ohm  4
+#define ddr4_rodt_ctl_240_ohm  5
+#define ddr4_rodt_ctl_34_ohm   6
+#define ddr4_rodt_ctl_48_ohm   7
+
+#define DIMM_CONFIG_TERMINATOR { {0, 0}, {NULL, NULL} }
+
+#define SET_DDR_DLL_CTL3(field, expr)                          \
+       do {                                                    \
+               if (octeon_is_cpuid(OCTEON_CN66XX) ||           \
+                   octeon_is_cpuid(OCTEON_CN63XX))             \
+                       ddr_dll_ctl3.cn63xx.field = (expr);     \
+               else if (octeon_is_cpuid(OCTEON_CN68XX) ||      \
+                        octeon_is_cpuid(OCTEON_CN61XX) ||      \
+                        octeon_is_cpuid(OCTEON_CNF71XX))       \
+                       ddr_dll_ctl3.cn61xx.field = (expr);     \
+               else if (octeon_is_cpuid(OCTEON_CN70XX) ||      \
+                        octeon_is_cpuid(OCTEON_CN78XX))        \
+                       ddr_dll_ctl3.cn70xx.field = (expr);     \
+               else if (octeon_is_cpuid(OCTEON_CN73XX) ||      \
+                        octeon_is_cpuid(OCTEON_CNF75XX))       \
+                       ddr_dll_ctl3.cn73xx.field = (expr);     \
+               else                                            \
+                       debug("%s(): " #field                   \
+                             "not set for unknown chip\n",     \
+                             __func__);                        \
+       } while (0)
+
+#define ENCODE_DLL90_BYTE_SEL(byte_sel)                                        \
+       (octeon_is_cpuid(OCTEON_CN70XX) ? ((9 + 7 - (byte_sel)) % 9) :  \
+        ((byte_sel) + 1))
+
+/**
+ * If debugging is disabled the ddr_print macro is not compatible
+ * with this macro.
+ */
+# define GET_DDR_DLL_CTL3(field)               \
+       ((octeon_is_cpuid(OCTEON_CN66XX) ||     \
+         octeon_is_cpuid(OCTEON_CN63XX)) ?     \
+        ddr_dll_ctl3.cn63xx.field :            \
+        (octeon_is_cpuid(OCTEON_CN68XX) ||     \
+         octeon_is_cpuid(OCTEON_CN61XX) ||     \
+         octeon_is_cpuid(OCTEON_CNF71XX)) ?    \
+        ddr_dll_ctl3.cn61xx.field :            \
+        (octeon_is_cpuid(OCTEON_CN70XX) ||     \
+         octeon_is_cpuid(OCTEON_CN78XX)) ?     \
+        ddr_dll_ctl3.cn70xx.field :            \
+        (octeon_is_cpuid(OCTEON_CN73XX) ||     \
+         octeon_is_cpuid(OCTEON_CNF75XX)) ?    \
+        ddr_dll_ctl3.cn73xx.field : 0)
+
+extern const char *ddr3_dimm_types[];
+extern const char *ddr4_dimm_types[];
+
+extern const struct dimm_odt_config disable_odt_config[];
+
+#define RLEVEL_BYTE_BITS       6
+#define RLEVEL_BYTE_MSK                ((1ULL << 6) - 1)
+
+/* Prototypes */
+int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm);
+int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm,
+                        int ddr_type);
+int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field);
+int read_spd_init(struct dimm_config *dimm_config, int dimm_index);
+void report_dimm(struct dimm_config *dimm_config, int upper_dimm,
+                int dimm, int if_num);
+int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config,
+                 int dimm_index);
+char *printable_rank_spec(char *buffer, int num_ranks, int dram_width,
+                         int spd_package);
+
+bool ddr_memory_preserved(struct ddr_priv *priv);
+
+int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte);
+int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte);
+void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte,
+                int delay);
+void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte,
+                int delay);
+
+int compute_ddr3_rlevel_delay(u8 mstart, u8 width,
+                             union cvmx_lmcx_rlevel_ctl rlevel_ctl);
+
+int encode_row_lsb_ddr3(int row_lsb);
+int encode_pbank_lsb_ddr3(int pbank_lsb);
+
+int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf,
+                        u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz,
+                        int if_num, u32 if_mask);
+
+void process_custom_dll_offsets(struct ddr_priv *priv, int if_num,
+                               const char *enable_str,
+                               const int8_t *offsets, const char *byte_str,
+                               int mode);
+int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end,
+              int max_adj_delay_inc);
+int roundup_ddr3_wlevel_bitmask(int bitmask);
+
+void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+                  int sequence);
+void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num);
+
+void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+                     union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte);
+
+int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p,
+                                int ddr_type);
+
+void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change);
+unsigned short load_dll_offset(struct ddr_priv *priv, int if_num,
+                              int dll_offset_mode,
+                              int byte_offset, int byte);
+
+u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx);
+u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx);
+
+void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed);
+void cvmx_dbi_switchover(struct ddr_priv *priv);
+
+int init_octeon3_ddr3_interface(struct ddr_priv *priv,
+                               struct ddr_conf *ddr_conf,
+                               u32 ddr_hertz, u32 cpu_hertz, u32 ddr_ref_hertz,
+                               int if_num, u32 if_mask);
+
+char *lookup_env(struct ddr_priv *priv, const char *format, ...);
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...);
+
+/* Each board provides a board-specific config table via this function */
+struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq);
+
+#endif /* __OCTEON_DDR_H_ */
diff --git a/arch/mips/mach-octeon/include/mangle-port.h b/arch/mips/mach-octeon/include/mangle-port.h
new file mode 100644 (file)
index 0000000..7e95dce
--- /dev/null
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+
+#ifndef __ASM_MACH_GENERIC_MANGLE_PORT_H
+#define __ASM_MACH_GENERIC_MANGLE_PORT_H
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+
+static inline bool __should_swizzle_bits(volatile void *a)
+{
+       extern const bool octeon_should_swizzle_table[];
+       u64 did = ((u64)(uintptr_t)a >> 40) & 0xff;
+
+       return octeon_should_swizzle_table[did];
+}
+
+# define __swizzle_addr_b(port)        (port)
+# define __swizzle_addr_w(port)        (port)
+# define __swizzle_addr_l(port)        (port)
+# define __swizzle_addr_q(port)        (port)
+
+#else /* __LITTLE_ENDIAN */
+
+#define __should_swizzle_bits(a)       false
+
+static inline bool __should_swizzle_addr(u64 p)
+{
+       /* boot bus? */
+       return ((p >> 40) & 0xff) == 0;
+}
+
+# define __swizzle_addr_b(port)        \
+       (__should_swizzle_addr(port) ? (port) ^ 7 : (port))
+# define __swizzle_addr_w(port)        \
+       (__should_swizzle_addr(port) ? (port) ^ 6 : (port))
+# define __swizzle_addr_l(port)        \
+       (__should_swizzle_addr(port) ? (port) ^ 4 : (port))
+# define __swizzle_addr_q(port)        (port)
+
+#endif /* __BIG_ENDIAN */
+
+
+# define ioswabb(a, x)         (x)
+# define __mem_ioswabb(a, x)   (x)
+# define ioswabw(a, x)         (__should_swizzle_bits(a) ? le16_to_cpu(x) : x)
+# define __mem_ioswabw(a, x)   (x)
+# define ioswabl(a, x)         (__should_swizzle_bits(a) ? le32_to_cpu(x) : x)
+# define __mem_ioswabl(a, x)   (x)
+# define ioswabq(a, x)         (__should_swizzle_bits(a) ? le64_to_cpu(x) : x)
+# define __mem_ioswabq(a, x)   (x)
+
+#endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */
index fa87cb4..56d1d22 100644 (file)
 #include <asm/mipsregs.h>
 #include <asm/addrspace.h>
 #include <asm/asm.h>
+#include <mach/octeon-model.h>
+
+#define COP0_CVMCTL_REG                $9,7    /* Cavium control */
+#define COP0_CVMMEMCTL_REG     $11,7   /* Cavium memory control */
+#define COP0_PROC_ID_REG       $15,0
 
        .set noreorder
 
 LEAF(lowlevel_init)
+
+       /* Set LMEMSZ in CVMMEMCTL register */
+       dmfc0   a0, COP0_CVMMEMCTL_REG
+       dins    a0, zero, 0, 9
+       mfc0    a4, COP0_PROC_ID_REG
+       li      a5, OCTEON_CN63XX_PASS1_0 /* Octeon cn63xx pass1 chip id */
+       bgt     a5, a4, 2f
+        ori     a0, 0x104      /* setup 4 lines of scratch */
+       ori     a6, a5, 8       /* Octeon cn63xx pass2 chip id */
+       bge     a4, a6, 2f
+        nop
+       li      a6, 4
+       ins     a0, a6, 11, 4   /* Set WBTHRESH=4 as per Core-14752 errata */
+2:
+       dmtc0   a0, COP0_CVMMEMCTL_REG
+
+       /* Set REPUN bit in CVMCTL register */
+       dmfc0   a0, COP0_CVMCTL_REG
+       ori     a0, 1<<14       /* enable fixup of unaligned mem access */
+       dmtc0   a0, COP0_CVMCTL_REG
+
        jr      ra
         nop
        END(lowlevel_init)
@@ -67,3 +93,53 @@ __dummy:
         nop
 
        END(mips_mach_early_init)
+
+LEAF(nmi_bootvector)
+
+       /*
+        * From Marvell original bootvector setup
+        */
+       mfc0    k0, CP0_STATUS
+       /* Enable 64-bit addressing, set ERL (should already be set) */
+       ori     k0, 0x84
+       mtc0    k0, CP0_STATUS
+       /* Core-14345, clear L1 Dcache virtual tags if the core hit an NMI */
+       cache   17, 0($0)
+
+       /*
+        * Needed for Linux kernel booting, otherwise it hangs while
+        * zero'ing all of CVMSEG
+        */
+       dmfc0   a0, COP0_CVMMEMCTL_REG
+       dins    a0, zero, 0, 9
+       ori     a0, 0x104       /* setup 4 lines of scratch */
+       dmtc0   a0, COP0_CVMMEMCTL_REG
+
+       /*
+        * Load parameters and entry point
+        */
+       PTR_LA  t9, nmi_handler_para
+       sync
+
+       ld      s0, 0x00(t9)
+       ld      a0, 0x08(t9)
+       ld      a1, 0x10(t9)
+       ld      a2, 0x18(t9)
+       ld      a3, 0x20(t9)
+
+       /* Finally jump to entry point (start kernel etc) */
+       j       s0
+        nop
+
+       END(nmi_bootvector)
+
+       /*
+        * Add here some space for the NMI parameters (entry point and args)
+        */
+       .globl nmi_handler_para
+nmi_handler_para:
+       .dword  0       // entry-point
+       .dword  0       // arg0
+       .dword  0       // arg1
+       .dword  0       // arg2
+       .dword  0       // arg3
index 56e50a9..611b18f 100644 (file)
@@ -3,7 +3,24 @@
  * Copyright (C) 2020 Stefan Roese <sr@denx.de>
  */
 
-/*
- * Nothing included right now. Code will be added in follow-up
- * patches.
- */
+#include <common.h>
+#include <dm.h>
+#include <ram.h>
+
+#include <mach/octeon_ddr.h>
+
+#include "board_ddr.h"
+
+#define EBB7304_DEF_DRAM_FREQ  800
+
+static struct ddr_conf board_ddr_conf[] = {
+        OCTEON_EBB7304_DDR_CONFIGURATION
+};
+
+struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq)
+{
+       *count = ARRAY_SIZE(board_ddr_conf);
+       *def_ddr_freq = EBB7304_DEF_DRAM_FREQ;
+
+       return board_ddr_conf;
+}
diff --git a/board/Marvell/octeon_ebb7304/board_ddr.h b/board/Marvell/octeon_ebb7304/board_ddr.h
new file mode 100644 (file)
index 0000000..f2f3419
--- /dev/null
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+
+#ifndef __BOARD_DDR_H__
+#define __BOARD_DDR_H__
+
+#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0                      \
+       { {0x1050, 0x0}, {NULL, NULL} }, { {0x1051, 0x0}, {NULL, NULL} }
+#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1                      \
+       { {0x1052, 0x0}, {NULL, NULL} }, { {0x1053, 0x0}, {NULL, NULL} }
+
+#define OCTEON_EBB7304_BOARD_EEPROM_TWSI_ADDR  0x56
+
+/*
+ * Local copy of these parameters to allow for customization for this
+ * board design.  The generic version resides in lib_octeon_shared.h.
+ */
+
+/* LMC0_MODEREG_PARAMS1 */
+#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT             \
+       {                                                       \
+               .cn78xx = {                                     \
+                       .pasr_00        = 0,                    \
+                       .asr_00         = 0,                    \
+                       .srt_00         = 0,                    \
+                       .rtt_wr_00      = ddr4_rttwr_80ohm & 3, \
+                       .rtt_wr_00_ext  = (ddr4_rttwr_80ohm >> 2) & 1,  \
+                       .dic_00         = ddr4_dic_34ohm,       \
+                       .rtt_nom_00     = 0,                    \
+                       .pasr_01        = 0,                    \
+                       .asr_01         = 0,                    \
+                       .srt_01         = 0,                    \
+                       .rtt_wr_01      = 0,                    \
+                       .dic_01         = ddr4_dic_34ohm,       \
+                       .rtt_nom_01     = 0,                    \
+                       .pasr_10        = 0,                    \
+                       .asr_10         = 0,                    \
+                       .srt_10         = 0,                    \
+                       .rtt_wr_10      = 0,                    \
+                       .dic_10         = ddr4_dic_34ohm,       \
+                       .rtt_nom_10     = 0,                    \
+                       .pasr_11        = 0,                    \
+                       .asr_11         = 0,                    \
+                       .srt_11         = 0,                    \
+                       .rtt_wr_11      = 0,                    \
+                       .dic_11         = ddr4_dic_34ohm,       \
+                       .rtt_nom_11     = 0,                    \
+               }                                               \
+       }
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT     \
+       {                                                       \
+               .cn78xx = {                                     \
+                       .pasr_00        = 0,                    \
+                       .asr_00         = 0,                    \
+                       .srt_00         = 0,                    \
+                       .rtt_wr_00      = ddr4_rttwr_80ohm & 3, \
+                       .rtt_wr_00_ext  = (ddr4_rttwr_80ohm >> 2) & 1,  \
+                       .dic_00         = ddr4_dic_34ohm,       \
+                       .rtt_nom_00     = 0,                    \
+                       .pasr_01        = 0,                    \
+                       .asr_01         = 0,                    \
+                       .srt_01         = 0,                    \
+                       .rtt_wr_01      = 0,                    \
+                       .dic_01         = ddr4_dic_34ohm,       \
+                       .rtt_nom_01     = 0,                    \
+                       .pasr_10        = 0,                    \
+                       .asr_10         = 0,                    \
+                       .srt_10         = 0,                    \
+                       .rtt_wr_10      = ddr4_rttwr_80ohm & 3, \
+                       .rtt_wr_10_ext  = (ddr4_rttwr_80ohm >> 2) & 1,  \
+                       .dic_10         = ddr4_dic_34ohm,       \
+                       .rtt_nom_10     = 0,                    \
+                       .pasr_11        = 0,                    \
+                       .asr_11         = 0,                    \
+                       .srt_11         = 0,                    \
+                       .rtt_wr_11      = 0,                    \
+                       .dic_11         = ddr4_dic_34ohm,       \
+                       .rtt_nom_11     = 0                     \
+               }                                               \
+       }
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT             \
+       {                                                       \
+               .cn78xx = {                                     \
+                       .pasr_00        = 0,                    \
+                       .asr_00         = 0,                    \
+                       .srt_00         = 0,                    \
+                       .rtt_wr_00      = ddr4_rttwr_240ohm,    \
+                       .dic_00         = ddr4_dic_34ohm,       \
+                       .rtt_nom_00     = 0,                    \
+                       .pasr_01        = 0,                    \
+                       .asr_01         = 0,                    \
+                       .srt_01         = 0,                    \
+                       .rtt_wr_01      = ddr4_rttwr_240ohm,    \
+                       .dic_01         = ddr4_dic_34ohm,       \
+                       .rtt_nom_01     = 0,                    \
+                       .pasr_10        = 0,                    \
+                       .asr_10         = 0,                    \
+                       .srt_10         = 0,                    \
+                       .dic_10         = ddr4_dic_34ohm,       \
+                       .rtt_nom_10     = 0,                    \
+                       .pasr_11        = 0,                    \
+                       .asr_11         = 0,                    \
+                       .srt_11         = 0,                    \
+                       .rtt_wr_11      = 0,                    \
+                       .dic_11         = ddr4_dic_34ohm,       \
+                       .rtt_nom_11     = 0,                    \
+               }                                               \
+       }
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT             \
+       {                                                       \
+               .cn78xx = {                                     \
+                       .pasr_00        = 0,                    \
+                       .asr_00         = 0,                    \
+                       .srt_00         = 0,                    \
+                       .rtt_wr_00      = ddr4_rttwr_240ohm,    \
+                       .dic_00         = ddr4_dic_34ohm,       \
+                       .rtt_nom_00     = ddr4_rttnom_120ohm,   \
+                       .pasr_01        = 0,                    \
+                       .asr_01         = 0,                    \
+                       .srt_01         = 0,                    \
+                       .rtt_wr_01      = ddr4_rttwr_240ohm,    \
+                       .dic_01         = ddr4_dic_34ohm,       \
+                       .rtt_nom_01     = ddr4_rttnom_120ohm,   \
+                       .pasr_10        = 0,                    \
+                       .asr_10         = 0,                    \
+                       .srt_10         = 0,                    \
+                       .rtt_wr_10      = ddr4_rttwr_240ohm,    \
+                       .dic_10         = ddr4_dic_34ohm,       \
+                       .rtt_nom_10     = ddr4_rttnom_120ohm,   \
+                       .pasr_11        = 0,                    \
+                       .asr_11         = 0,                    \
+                       .srt_11         = 0,                    \
+                       .rtt_wr_11      = ddr4_rttwr_240ohm,    \
+                       .dic_11         = ddr4_dic_34ohm,       \
+                       .rtt_nom_11     = ddr4_rttnom_120ohm,   \
+               }                                               \
+       }
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT             \
+       {                                                       \
+               .cn78xx = {                                     \
+                       .pasr_00        = 0,                    \
+                       .asr_00         = 0,                    \
+                       .srt_00         = 0,                    \
+                       .rtt_wr_00      = rttwr_60ohm,          \
+                       .dic_00         = dic_34ohm,            \
+                       .rtt_nom_00     = rttnom_20ohm,         \
+                       .pasr_01        = 0,                    \
+                       .asr_01         = 0,                    \
+                       .srt_01         = 0,                    \
+                       .rtt_wr_01      = rttwr_60ohm,          \
+                       .dic_01         = dic_34ohm,            \
+                       .rtt_nom_01     = rttnom_none,          \
+                       .pasr_10        = 0,                    \
+                       .asr_10         = 0,                    \
+                       .srt_10         = 0,                    \
+                       .rtt_wr_10      = rttwr_60ohm,          \
+                       .dic_10         = dic_34ohm,            \
+                       .rtt_nom_10     = rttnom_20ohm,         \
+                       .pasr_11        = 0,                    \
+                       .asr_11         = 0,                    \
+                       .srt_11         = 0,                    \
+                       .rtt_wr_11      = rttwr_60ohm,          \
+                       .dic_11         = dic_34ohm,            \
+                       .rtt_nom_11     = rttnom_none,          \
+               }                                               \
+       }
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT     \
+{                                                      \
+       .cn78xx = {                                     \
+               .rtt_park_00    = ddr4_rttpark_60ohm,   \
+               .vref_value_00  = 0x22,                 \
+               .vref_range_00  = 0,                    \
+               .rtt_park_01    = 0,                    \
+               .vref_value_01  = 0,                    \
+               .vref_range_01  = 0,                    \
+               .rtt_park_10    = 0,                    \
+               .vref_value_10  = 0,                    \
+               .vref_range_10  = 0,                    \
+               .rtt_park_11    = 0,                    \
+               .vref_value_11  = 0,                    \
+               .vref_range_11  = 0                     \
+       }                                               \
+}
+
+/* FIX */
+#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT     \
+{                                                      \
+       .cn78xx = {                                     \
+               .rtt_park_00    = ddr4_rttpark_48ohm,   \
+               .vref_value_00  = 0x1f,                 \
+               .vref_range_00  = 0,                    \
+               .rtt_park_01    = 0,                    \
+               .vref_value_01  = 0,                    \
+               .vref_range_01  = 0,                    \
+               .rtt_park_10    = ddr4_rttpark_48ohm,   \
+               .vref_value_10  = 0x1f,                 \
+               .vref_range_10  = 0,                    \
+               .rtt_park_11    = 0,                    \
+               .vref_value_11  = 0,                    \
+               .vref_range_11  = 0                     \
+       }                                               \
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT     \
+{                                                      \
+       .cn78xx = {                                     \
+               .rtt_park_00    = ddr4_rttpark_120ohm,  \
+               .vref_value_00  = 0x19,                 \
+               .vref_range_00  = 0,                    \
+               .rtt_park_01    = ddr4_rttpark_120ohm,  \
+               .vref_value_01  = 0x19,                 \
+               .vref_range_01  = 0,                    \
+               .rtt_park_10    = 0,                    \
+               .vref_value_10  = 0,                    \
+               .vref_range_10  = 0,                    \
+               .rtt_park_11    = 0,                    \
+               .vref_value_11  = 0,                    \
+               .vref_range_11  = 0                     \
+       }                                               \
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT     \
+{                                                      \
+       .cn78xx = {                                     \
+               .rtt_park_00    = ddr4_rttpark_60ohm,   \
+               .vref_value_00  = 0x19,                 \
+               .vref_range_00  = 0,                    \
+               .rtt_park_01    = ddr4_rttpark_60ohm,   \
+               .vref_value_01  = 0x19,                 \
+               .vref_range_01  = 0,                    \
+               .rtt_park_10    = ddr4_rttpark_60ohm,   \
+               .vref_value_10  = 0x19,                 \
+               .vref_range_10  = 0,                    \
+               .rtt_park_11    = ddr4_rttpark_60ohm,   \
+               .vref_value_11  = 0x19,                 \
+               .vref_range_11  = 0                     \
+       }                                               \
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT     \
+{                                                      \
+       .cn78xx = {                                     \
+               .rtt_park_00    = ddr4_rttpark_80ohm,   \
+               .vref_value_00  = 0x1f,                 \
+               .vref_range_00  = 0,                    \
+               .rtt_park_01    = ddr4_rttpark_80ohm,   \
+               .vref_value_01  = 0x1f,                 \
+               .vref_range_01  = 0,                    \
+               .rtt_park_10    = 0,                    \
+               .vref_value_10  = 0,                    \
+               .vref_range_10  = 0,                    \
+               .rtt_park_11    = 0,                    \
+               .vref_value_11  = 0,                    \
+               .vref_range_11  = 0                     \
+       }                                               \
+}
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION             \
+       /*   1 */                                                       \
+       {                                                               \
+               ddr4_dqx_driver_34_ohm,                                 \
+               0x00000000ULL,                                          \
+               OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT,             \
+               OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT,             \
+               ddr4_rodt_ctl_48_ohm,                                   \
+               0x00000000ULL,                                          \
+               0                                                       \
+       },                                                              \
+       /*   2 */                                                       \
+       {                                                               \
+               ddr4_dqx_driver_34_ohm,                                 \
+               0x00000000ULL,                                          \
+               OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT,             \
+               OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT,             \
+               ddr4_rodt_ctl_80_ohm,                                   \
+               0x00000000ULL,                                          \
+               0                                                       \
+       }
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION             \
+       /*   1 */                                                       \
+       {                                                               \
+               ddr4_dqx_driver_34_ohm,                                 \
+               0x00000000ULL,                                          \
+               OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT,             \
+               OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT,             \
+               ddr4_rodt_ctl_80_ohm,                                   \
+               0x00000000ULL,                                          \
+               0                                                       \
+       },                                                              \
+       /*   2 */                                                       \
+       {                                                               \
+               ddr4_dqx_driver_34_ohm,                                 \
+               0x0c0c0303ULL,                                          \
+               OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT,             \
+               OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT,             \
+               ddr4_rodt_ctl_48_ohm,                                   \
+               0x04080102ULL,                                          \
+               0                                                       \
+       }
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION             \
+       /*   1 */                                                       \
+       {                                                               \
+               ddr4_dqx_driver_34_ohm,                                 \
+               0x01030203ULL,                                          \
+               OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT,             \
+               OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT,             \
+               ddr4_rodt_ctl_48_ohm,                                   \
+               0x01010202ULL,                                          \
+               0                                                       \
+       }
+
+/*
+ * Construct a static initializer for the ddr_configuration_t variable that
+ * holds (almost) all of the information required for DDR initialization.
+ */
+
+/*
+ * The parameters below make up the custom_lmc_config data structure.
+ * This structure is used to customize the way that the LMC DRAM
+ * Controller is configured for a particular board design.
+ *
+ * Refer to the file lib_octeon_board_table_entry.h for a description
+ * of the custom board settings.  It is usually kept in the following
+ * location... arch/mips/include/asm/arch-octeon/
+ *
+ */
+
+#define OCTEON_EBB7304_DDR_CONFIGURATION                               \
+/* Interface 0 */                                                      \
+{                                                                      \
+       .custom_lmc_config = {                                          \
+               .min_rtt_nom_idx                = 1,                    \
+               .max_rtt_nom_idx                = 7,                    \
+               .min_rodt_ctl                   = 1,                    \
+               .max_rodt_ctl                   = 7,                    \
+               .ck_ctl                         = ddr4_driver_34_ohm,   \
+               .cmd_ctl                        = ddr4_driver_34_ohm,   \
+               .ctl_ctl                        = ddr4_driver_34_ohm,   \
+               .min_cas_latency                = 0,                    \
+               .offset_en                      = 1,                    \
+               .offset_udimm                   = 2,                    \
+               .offset_rdimm                   = 2,                    \
+               .ddr_rtt_nom_auto               = 0,                    \
+               .ddr_rodt_ctl_auto              = 0,                    \
+               .rlevel_comp_offset_udimm       = 0,                    \
+               .rlevel_comp_offset_rdimm       = 0,                    \
+               .rlevel_compute                 = 0,                    \
+               .ddr2t_udimm                    = 1,                    \
+               .ddr2t_rdimm                    = 1,                    \
+               .maximum_adjacent_rlevel_delay_increment = 2,           \
+               .fprch2                         = 2,                    \
+               .dll_write_offset               = NULL,                 \
+               .dll_read_offset                = NULL,                 \
+               .parity                         = 0                     \
+       },                                                              \
+       .dimm_config_table = {                                          \
+               OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0,              \
+               DIMM_CONFIG_TERMINATOR                                  \
+       },                                                              \
+       .unbuffered = {                                                 \
+               .ddr_board_delay                = 0,                    \
+               .lmc_delay_clk                  = 0,                    \
+               .lmc_delay_cmd                  = 0,                    \
+               .lmc_delay_dq                   = 0                     \
+       },                                                              \
+       .registered = {                                                 \
+               .ddr_board_delay                = 0,                    \
+               .lmc_delay_clk                  = 0,                    \
+               .lmc_delay_cmd                  = 0,                    \
+               .lmc_delay_dq                   = 0                     \
+       },                                                              \
+       .odt_1rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION      \
+       },                                                              \
+       .odt_2rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION      \
+       },                                                              \
+       .odt_4rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION      \
+       }                                                               \
+},                                                                     \
+/* Interface 1 */                                                      \
+{                                                                      \
+       .custom_lmc_config = {                                          \
+               .min_rtt_nom_idx                = 1,                    \
+               .max_rtt_nom_idx                = 7,                    \
+               .min_rodt_ctl                   = 1,                    \
+               .max_rodt_ctl                   = 7,                    \
+               .ck_ctl                         = ddr4_driver_34_ohm,   \
+               .cmd_ctl                        = ddr4_driver_34_ohm,   \
+               .ctl_ctl                        = ddr4_driver_34_ohm,   \
+               .min_cas_latency                = 0,                    \
+               .offset_en                      = 1,                    \
+               .offset_udimm                   = 2,                    \
+               .offset_rdimm                   = 2,                    \
+               .ddr_rtt_nom_auto               = 0,                    \
+               .ddr_rodt_ctl_auto              = 0,                    \
+               .rlevel_comp_offset_udimm       = 0,                    \
+               .rlevel_comp_offset_rdimm       = 0,                    \
+               .rlevel_compute                 = 0,                    \
+               .ddr2t_udimm                    = 1,                    \
+               .ddr2t_rdimm                    = 1,                    \
+               .maximum_adjacent_rlevel_delay_increment = 2,           \
+               .fprch2                         = 2,                    \
+               .dll_write_offset               = NULL,                 \
+               .dll_read_offset                = NULL,                 \
+               .parity                         = 0                     \
+       },                                                              \
+       .dimm_config_table = {                                          \
+               OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1,              \
+               DIMM_CONFIG_TERMINATOR                                  \
+       },                                                              \
+       .unbuffered = {                                                 \
+               .ddr_board_delay                = 0,                    \
+               .lmc_delay_clk                  = 0,                    \
+               .lmc_delay_cmd                  = 0,                    \
+               .lmc_delay_dq                   = 0                     \
+       },                                                              \
+       .registered = {                                                 \
+               .ddr_board_delay                = 0,                    \
+               .lmc_delay_clk                  = 0,                    \
+               .lmc_delay_cmd                  = 0,                    \
+               .lmc_delay_dq                   = 0                     \
+       },                                                              \
+       .odt_1rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION      \
+       },                                                              \
+       .odt_2rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION      \
+       },                                                              \
+       .odt_4rank_config = {                                           \
+               OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION      \
+       }                                                               \
+},
+
+#endif /* __BOARD_DDR_H__ */
index f8d27b0..a98d73a 100644 (file)
@@ -15,12 +15,19 @@ CONFIG_HUSH_PARSER=y
 CONFIG_CMD_GPIO=y
 CONFIG_CMD_I2C=y
 CONFIG_CMD_MTD=y
+CONFIG_CMD_PART=y
 CONFIG_CMD_PCI=y
+CONFIG_CMD_USB=y
 CONFIG_CMD_DHCP=y
 CONFIG_CMD_PING=y
 CONFIG_CMD_TIME=y
+CONFIG_CMD_EXT4=y
+CONFIG_CMD_FAT=y
+CONFIG_CMD_FS_GENERIC=y
+# CONFIG_DOS_PARTITION is not set
 CONFIG_ENV_IS_IN_FLASH=y
 CONFIG_ENV_ADDR=0x1FBFE000
+CONFIG_BLK=y
 CONFIG_CLK=y
 # CONFIG_INPUT is not set
 CONFIG_MTD=y
@@ -38,6 +45,9 @@ CONFIG_SPI_FLASH_STMICRO=y
 # CONFIG_NETDEVICES is not set
 CONFIG_PCI=y
 CONFIG_DM_PCI=y
+CONFIG_RAM=y
+CONFIG_RAM_OCTEON=y
+CONFIG_RAM_OCTEON_DDR4=y
 CONFIG_DEBUG_UART_SHIFT=3
 CONFIG_DEBUG_UART_ANNOUNCE=y
 CONFIG_SYS_NS16550=y
@@ -45,4 +55,14 @@ CONFIG_SPI=y
 CONFIG_OCTEON_SPI=y
 CONFIG_SYSRESET=y
 CONFIG_SYSRESET_OCTEON=y
+CONFIG_USB=y
+CONFIG_DM_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_DWC3=y
+CONFIG_USB_HOST_ETHER=y
+CONFIG_USB_ETHER_ASIX=y
+CONFIG_USB_ETHER_ASIX88179=y
+CONFIG_USB_ETHER_MCS7830=y
+CONFIG_USB_ETHER_RTL8152=y
+CONFIG_USB_ETHER_SMSC95XX=y
 CONFIG_HEXDUMP=y
index 7e6e981..a0e859a 100644 (file)
@@ -76,3 +76,4 @@ config IMXRT_SDRAM
 source "drivers/ram/rockchip/Kconfig"
 source "drivers/ram/sifive/Kconfig"
 source "drivers/ram/stm32mp1/Kconfig"
+source "drivers/ram/octeon/Kconfig"
index 769c9d6..d685a57 100644 (file)
@@ -19,3 +19,5 @@ obj-$(CONFIG_K3_J721E_DDRSS) += k3-j721e/
 obj-$(CONFIG_IMXRT_SDRAM) += imxrt_sdram.o
 
 obj-$(CONFIG_RAM_SIFIVE) += sifive/
+
+obj-$(CONFIG_ARCH_OCTEON) += octeon/
diff --git a/drivers/ram/octeon/Kconfig b/drivers/ram/octeon/Kconfig
new file mode 100644 (file)
index 0000000..eb5a120
--- /dev/null
@@ -0,0 +1,17 @@
+config RAM_OCTEON
+       bool "Ram drivers for Octeon SoCs"
+       depends on RAM && ARCH_OCTEON
+       default n
+       help
+        This enables support for RAM drivers for Octeon SoCs.
+
+if RAM_OCTEON
+
+config RAM_OCTEON_DDR4
+       bool "Octeon III DDR4 RAM support"
+       default n
+       help
+        This enables support for DDR4 RAM suppoort for Octeon III.  This does
+        not include support for Octeon CN70XX.
+
+endif # RAM_OCTEON
diff --git a/drivers/ram/octeon/Makefile b/drivers/ram/octeon/Makefile
new file mode 100644 (file)
index 0000000..27649d1
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2020 Marvell, Inc.
+#
+
+obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon_ddr.o
+obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon3_lmc.o
+obj-y += dimm_spd_eeprom.o
diff --git a/drivers/ram/octeon/dimm_spd_eeprom.c b/drivers/ram/octeon/dimm_spd_eeprom.c
new file mode 100644 (file)
index 0000000..30db548
--- /dev/null
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <i2c.h>
+#include <ram.h>
+
+#include <mach/octeon_ddr.h>
+
+#define DEVICE_TYPE    DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4
+#define MODULE_TYPE    DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4
+#define BUS_WIDTH(t)   (((t) == DDR4_DRAM) ?               \
+                        DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : \
+                        DDR3_SPD_MEMORY_BUS_WIDTH)
+
+/*
+ * Allow legacy code to encode bus number in the upper bits of the address
+ * These are only supported in read_spd()
+ */
+#define OCTEON_TWSI_BUS_IN_ADDR_BIT       12
+#define OCTEON_TWSI_BUS_IN_ADDR_MASK      (15 << OCTEON_TWSI_BUS_IN_ADDR_BIT)
+#define OCTEON_TWSI_GET_BUS(addr)                      \
+       (((addr) >> OCTEON_TWSI_BUS_IN_ADDR_BIT) & 0xf)
+
+const char *ddr3_dimm_types[] = {
+       /* 0000 */ "Undefined",
+       /* 0001 */ "RDIMM",
+       /* 0010 */ "UDIMM",
+       /* 0011 */ "SO-DIMM",
+       /* 0100 */ "Micro-DIMM",
+       /* 0101 */ "Mini-RDIMM",
+       /* 0110 */ "Mini-UDIMM",
+       /* 0111 */ "Mini-CDIMM",
+       /* 1000 */ "72b-SO-UDIMM",
+       /* 1001 */ "72b-SO-RDIMM",
+       /* 1010 */ "72b-SO-CDIMM"
+       /* 1011 */ "LRDIMM",
+       /* 1100 */ "16b-SO-DIMM",
+       /* 1101 */ "32b-SO-DIMM",
+       /* 1110 */ "Reserved",
+       /* 1111 */ "Reserved"
+};
+
+const char *ddr4_dimm_types[] = {
+       /* 0000 */ "Extended",
+       /* 0001 */ "RDIMM",
+       /* 0010 */ "UDIMM",
+       /* 0011 */ "SO-DIMM",
+       /* 0100 */ "LRDIMM",
+       /* 0101 */ "Mini-RDIMM",
+       /* 0110 */ "Mini-UDIMM",
+       /* 0111 */ "Reserved",
+       /* 1000 */ "72b-SO-RDIMM",
+       /* 1001 */ "72b-SO-UDIMM",
+       /* 1010 */ "Reserved",
+       /* 1011 */ "Reserved",
+       /* 1100 */ "16b-SO-DIMM",
+       /* 1101 */ "32b-SO-DIMM",
+       /* 1110 */ "Reserved",
+       /* 1111 */ "Reserved"
+};
+
+static u16 ddr3_crc16(u8 *ptr, int count)
+{
+       /* From DDR3 SPD specification */
+       int crc, i;
+
+       crc = 0;
+       while (--count >= 0) {
+               crc = crc ^ (int)*ptr++ << 8;
+               for (i = 0; i < 8; ++i) {
+                       if (crc & 0x8000)
+                               crc = crc << 1 ^ 0x1021;
+                       else
+                               crc = crc << 1;
+               }
+       }
+
+       return (crc & 0xFFFF);
+}
+
+static int validate_spd_checksum_ddr4(struct dimm_config *dimm_config,
+                                     int dimm_index, int twsi_addr, int silent)
+{
+       u8 *spd_data = dimm_config->spd_data[dimm_index];
+       int crc_bytes = 126;
+       u16 crc_comp;
+
+       /* Check byte 0 to see how many bytes checksum is over */
+       if (spd_data[0] & 0x80)
+               crc_bytes = 117;
+
+       crc_comp = ddr3_crc16(spd_data, crc_bytes);
+
+       if (spd_data[126] == (crc_comp & 0xff) &&
+           spd_data[127] == (crc_comp >> 8))
+               return 1;
+
+       if (!silent) {
+               printf("DDR4 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n",
+                      twsi_addr, crc_comp, spd_data[127], spd_data[126]);
+       }
+
+       return 0;
+}
+
+static int validate_spd_checksum(struct ddr_priv *priv,
+                                struct dimm_config *dimm_config,
+                                int dimm_index, int twsi_addr,
+                                int silent, u8 rv)
+{
+       if (ddr_verbose(priv))
+               debug("Validating DIMM at address 0x%x\n", twsi_addr);
+
+       if (rv >= 0x8 && rv <= 0xA)
+               printf("%s: Error: DDR2 support disabled\n", __func__);
+
+       if (rv == 0xB)
+               printf("%s: Error: DDR3 support disabled\n", __func__);
+
+       if (rv == 0xC) {
+               return validate_spd_checksum_ddr4(dimm_config, dimm_index,
+                                                 twsi_addr, silent);
+       }
+
+       if (!silent) {
+               printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n",
+                      rv, twsi_addr);
+       }
+
+       return 0;
+}
+
+/*
+ * Read an DIMM SPD value, either using TWSI to read it from the DIMM, or
+ * from a provided array.
+ */
+int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field)
+{
+       dimm_index = !!dimm_index;
+
+       if (spd_field >= SPD_EEPROM_SIZE) {
+               printf("ERROR: Trying to read unsupported SPD EEPROM value %d\n",
+                      spd_field);
+       }
+
+       /*
+        * If pointer to data is provided, use it, otherwise read from SPD
+        * over twsi
+        */
+       if (dimm_config->spd_ptrs[dimm_index])
+               return dimm_config->spd_ptrs[dimm_index][spd_field];
+       else if (dimm_config->spd_addrs[dimm_index])
+               return dimm_config->spd_data[dimm_index][spd_field];
+
+       return -1;
+}
+
+int read_spd_init(struct dimm_config *dimm_config, int dimm_index)
+{
+       u8 busno = OCTEON_TWSI_GET_BUS(dimm_config->spd_addrs[dimm_index]);
+       u8 cmdno = dimm_config->spd_addrs[dimm_index];
+       struct udevice *dev_i2c;
+       u8 *spd_data;
+       int ret;
+
+       if (dimm_config->spd_cached[dimm_index])
+               return 0;
+
+       dimm_config->spd_cached[dimm_index] = 1;
+       spd_data = dimm_config->spd_data[dimm_index];
+
+       ret = i2c_get_chip_for_busnum(busno, cmdno, 2, &dev_i2c);
+       if (ret) {
+               debug("Cannot find SPL EEPROM: %d\n", ret);
+               return -ENODEV;
+       }
+
+       ret = dm_i2c_read(dev_i2c, 0, spd_data, SPD_EEPROM_SIZE);
+
+       return ret;
+}
+
+int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config,
+                 int dimm_index)
+{
+       int spd_addr;
+
+       dimm_index = !!dimm_index;  /* Normalize to 0/1 */
+       spd_addr = dimm_config->spd_addrs[dimm_index];
+
+       debug("Validating dimm %d, spd addr: 0x%02x spd ptr: %p\n",
+             dimm_index,
+             dimm_config->spd_addrs[dimm_index],
+             dimm_config->spd_ptrs[dimm_index]);
+
+       /* Only validate 'real' dimms, assume compiled in values are OK */
+       if (!dimm_config->spd_ptrs[dimm_index]) {
+               int val0, val1;
+               int dimm_type;
+               int ret;
+
+               ret = read_spd_init(dimm_config, dimm_index);
+               if (ret)
+                       return 0;
+
+               dimm_type = read_spd(dimm_config, dimm_index,
+                                    DDR2_SPD_MEM_TYPE) & 0xff;
+               switch (dimm_type) {
+               case 0x0B:              /* DDR3 */
+                       if (ddr_verbose(priv))
+                               printf("Validating DDR3 DIMM %d\n", dimm_index);
+                       val0 = read_spd(dimm_config, dimm_index,
+                                       DDR3_SPD_DENSITY_BANKS);
+                       val1 = read_spd(dimm_config, dimm_index,
+                                       DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+                       if (val0 < 0 && val1 < 0) {
+                               if (ddr_verbose(priv))
+                                       printf("Error reading SPD for DIMM %d\n",
+                                              dimm_index);
+                               return 0; /* Failed to read dimm */
+                       }
+                       if (val0 == 0xff && val1 == 0xff) {
+                               if (ddr_verbose(priv))
+                                       printf("Blank or unreadable SPD for DIMM %d\n",
+                                              dimm_index);
+                               /* Blank SPD or otherwise unreadable device */
+                               return 0;
+                       }
+
+                       /* Don't treat bad checksums as fatal */
+                       validate_spd_checksum(priv, dimm_config, dimm_index,
+                                             spd_addr, 0, dimm_type);
+                       break;
+
+               case 0x0C:              /* DDR4 */
+                       if (ddr_verbose(priv))
+                               printf("Validating DDR4 DIMM %d\n", dimm_index);
+                       val0 = read_spd(dimm_config, dimm_index,
+                                       DDR4_SPD_DENSITY_BANKS);
+                       val1 = read_spd(dimm_config, dimm_index,
+                                       DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+                       if (val0 < 0 && val1 < 0) {
+                               if (ddr_verbose(priv))
+                                       printf("Error reading SPD for DIMM %d\n",
+                                              dimm_index);
+                               return 0; /* Failed to read dimm */
+                       }
+                       if (val0 == 0xff && val1 == 0xff) {
+                               if (ddr_verbose(priv)) {
+                                       printf("Blank or unreadable SPD for DIMM %d\n",
+                                              dimm_index);
+                               }
+                               /* Blank SPD or otherwise unreadable device */
+                               return 0;
+                       }
+
+                       /* Don't treat bad checksums as fatal */
+                       validate_spd_checksum(priv, dimm_config, dimm_index,
+                                             spd_addr, 0, dimm_type);
+                       break;
+
+               case 0x00:
+                       /* Terminator detected. Fail silently. */
+                       return 0;
+
+               default:
+                       debug("Unknown DIMM type 0x%x for DIMM %d @ 0x%x\n",
+                             dimm_type, dimm_index,
+                             dimm_config->spd_addrs[dimm_index]);
+                       return 0;      /* Failed to read dimm */
+               }
+       }
+
+       return 1;
+}
+
+int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm)
+{
+       int spd_ddr_type;
+
+       spd_ddr_type = read_spd(dimm_config, upper_dimm, DEVICE_TYPE);
+
+       debug("%s:%d spd_ddr_type=0x%02x\n", __func__, __LINE__,
+             spd_ddr_type);
+
+       /* we return only DDR4 or DDR3 */
+       return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM;
+}
+
+static int get_dimm_ecc(struct dimm_config *dimm_config, int upper_dimm,
+                       int ddr_type)
+{
+       return !!(read_spd(dimm_config, upper_dimm, BUS_WIDTH(ddr_type)) & 8);
+}
+
+int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm,
+                        int ddr_type)
+{
+       return read_spd(dimm_config, upper_dimm, MODULE_TYPE) & 0x0f;
+}
+
+char *printable_rank_spec(char *buffer, int num_ranks, int dram_width,
+                         int spd_package)
+{
+       int die_count = ((spd_package >> 4) & 7) + 1;
+
+       if (spd_package & 0x80) { // non-monolithic
+               if ((spd_package & 3) == 2) { // 3DS
+                       sprintf(buffer, "%dS%dRx%d", num_ranks, die_count,
+                               dram_width);
+               } else { // MLS
+                       char hchar = (die_count == 2) ? 'D' : 'Q';
+
+                       sprintf(buffer, "%d%cRx%d", num_ranks, hchar,
+                               dram_width);
+               }
+       } else {
+               sprintf(buffer, "%dRx%d", num_ranks, dram_width);
+       }
+
+       return buffer;
+}
+
+static void report_common_dimm(struct dimm_config *dimm_config, int upper_dimm,
+                              int dimm, const char **dimm_types, int ddr_type,
+                              char *volt_str, int if_num,
+                              int num_ranks, int dram_width, int spd_package)
+{
+       unsigned int spd_module_type;
+       char rank_spec[8];
+       int spd_ecc;
+
+       spd_module_type = get_dimm_module_type(dimm_config, upper_dimm,
+                                              ddr_type);
+       spd_ecc = get_dimm_ecc(dimm_config, upper_dimm, ddr_type);
+
+       printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
+       printf("LMC%d.DIMM%d: DDR%d %s %s %s, %s\n",
+              if_num, dimm, ddr_type, dimm_types[spd_module_type],
+              rank_spec, spd_ecc ? "ECC" : "non-ECC", volt_str);
+}
+
+static void report_ddr3_dimm(struct dimm_config *dimm_config, int upper_dimm,
+                            int dimm, int if_num)
+{
+       int spd_voltage;
+       char *volt_str;
+       int spd_org = read_spd(dimm_config, upper_dimm,
+                              DDR3_SPD_MODULE_ORGANIZATION);
+       int num_ranks = 1 +  ((spd_org >> 3) & 0x7);
+       int dram_width = 4 << ((spd_org >> 0) & 0x7);
+
+       spd_voltage = read_spd(dimm_config, upper_dimm,
+                              DDR3_SPD_NOMINAL_VOLTAGE);
+       if (spd_voltage == 0 || spd_voltage & 3)
+               volt_str = "1.5V";
+       if (spd_voltage & 2)
+               volt_str = "1.35V";
+       if (spd_voltage & 4)
+               volt_str = "1.2xV";
+
+       report_common_dimm(dimm_config, upper_dimm, dimm, ddr3_dimm_types,
+                          DDR3_DRAM, volt_str, if_num,
+                          num_ranks, dram_width, /*spd_package*/0);
+}
+
+static void report_ddr4_dimm(struct dimm_config *dimm_config, int upper_dimm,
+                            int dimm, int if_num)
+{
+       int spd_voltage;
+       char *volt_str;
+       int spd_package = 0xff & read_spd(dimm_config, upper_dimm,
+                                         DDR4_SPD_PACKAGE_TYPE);
+       int spd_org     = 0xff & read_spd(dimm_config, upper_dimm,
+                                         DDR4_SPD_MODULE_ORGANIZATION);
+       int num_ranks   = 1 +  ((spd_org >> 3) & 0x7);
+       int dram_width  = 4 << ((spd_org >> 0) & 0x7);
+
+       spd_voltage = read_spd(dimm_config, upper_dimm,
+                              DDR4_SPD_MODULE_NOMINAL_VOLTAGE);
+       if (spd_voltage == 0x01 || spd_voltage & 0x02)
+               volt_str = "1.2V";
+       if (spd_voltage == 0x04 || spd_voltage & 0x08)
+               volt_str = "TBD1 V";
+       if (spd_voltage == 0x10 || spd_voltage & 0x20)
+               volt_str = "TBD2 V";
+
+       report_common_dimm(dimm_config, upper_dimm, dimm, ddr4_dimm_types,
+                          DDR4_DRAM, volt_str, if_num,
+                          num_ranks, dram_width, spd_package);
+}
+
+void report_dimm(struct dimm_config *dimm_config, int upper_dimm,
+                int dimm, int if_num)
+{
+       int ddr_type;
+
+       /* ddr_type only indicates DDR4 or DDR3 */
+       ddr_type = get_ddr_type(dimm_config, upper_dimm);
+
+       if (ddr_type == DDR4_DRAM)
+               report_ddr4_dimm(dimm_config, 0, dimm, if_num);
+       else
+               report_ddr3_dimm(dimm_config, 0, dimm, if_num);
+}
diff --git a/drivers/ram/octeon/octeon3_lmc.c b/drivers/ram/octeon/octeon3_lmc.c
new file mode 100644 (file)
index 0000000..327cdc5
--- /dev/null
@@ -0,0 +1,11030 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <command.h>
+#include <dm.h>
+#include <hang.h>
+#include <i2c.h>
+#include <ram.h>
+#include <time.h>
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+#include <mach/octeon_ddr.h>
+
+/* Random number generator stuff */
+
+#define CVMX_RNM_CTL_STATUS    0x0001180040000000
+#define CVMX_OCT_DID_RNG       8ULL
+
+static u64 cvmx_build_io_address(u64 major_did, u64 sub_did)
+{
+       return ((0x1ull << 48) | (major_did << 43) | (sub_did << 40));
+}
+
+static u64 cvmx_rng_get_random64(void)
+{
+       return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
+}
+
+static void cvmx_rng_enable(void)
+{
+       u64 val;
+
+       val = csr_rd(CVMX_RNM_CTL_STATUS);
+       val |= BIT(0) | BIT(1);
+       csr_wr(CVMX_RNM_CTL_STATUS, val);
+}
+
+#define RLEVEL_PRINTALL_DEFAULT                1
+#define WLEVEL_PRINTALL_DEFAULT                1
+
+/*
+ * Define how many HW WL samples to take for majority voting.
+ * MUST BE odd!!
+ * Assume there should only be 2 possible values that will show up,
+ * so treat ties as a problem!!!
+ * NOTE: Do not change this without checking the code!!!
+ */
+#define WLEVEL_LOOPS_DEFAULT           5
+
+#define ENABLE_COMPUTED_VREF_ADJUSTMENT        1
+#define SW_WLEVEL_HW_DEFAULT           1
+#define DEFAULT_BEST_RANK_SCORE                9999999
+#define MAX_RANK_SCORE_LIMIT           99
+
+/*
+ * Define how many HW RL samples per rank to take multiple samples will
+ * allow looking for the best sample score
+ */
+#define RLEVEL_SAMPLES_DEFAULT         3
+
+#define ddr_seq_print(format, ...) do {} while (0)
+
+struct wlevel_bitcnt {
+       int bitcnt[4];
+};
+
+static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
+                                    int ecc_ena, int *settings, char *title);
+
+static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
+                                       int dac_value, int byte);
+
+/* "mode" arg */
+#define DBTRAIN_TEST 0
+#define DBTRAIN_DBI  1
+#define DBTRAIN_LFSR 2
+
+static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
+                               int mode, u64 *xor_data);
+
+#define LMC_DDR3_RESET_ASSERT   0
+#define LMC_DDR3_RESET_DEASSERT 1
+
+static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
+{
+       union cvmx_lmcx_reset_ctl reset_ctl;
+
+       /*
+        * 4. Deassert DDRn_RESET_L pin by writing
+        *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
+        *    without modifying any other LMC(0..3)_RESET_CTL fields.
+        * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+        * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+        *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
+        *    assertion.
+        */
+       debug("LMC%d %s DDR_RESET_L\n", if_num,
+             (reset ==
+              LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
+
+       reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+       reset_ctl.cn78xx.ddr3rst = reset;
+       lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
+
+       lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+
+       udelay(500);
+}
+
+static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
+{
+       /*
+        * 5.9.6 LMC RESET Initialization
+        *
+        * The purpose of this step is to assert/deassert the RESET# pin at the
+        * DDR3/DDR4 parts.
+        *
+        * This LMC RESET step is done for all enabled LMCs.
+        *
+        * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
+        * are in self refresh and are currently preserving their
+        * contents. (Software can determine this via
+        * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
+        * this section assumes that the DRAM contents need not be preserved.
+        *
+        * The remainder of this section assumes that the CN78XX DDRn_RESET_L
+        * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
+        * as will be appropriate in many systems.
+        *
+        * (In other systems, such as ones that can preserve DDR3/DDR4 part
+        * contents while CN78XX is powered down, it will not be appropriate to
+        * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
+        * DDR3/DDR4 parts, and this section may not apply.)
+        *
+        * The remainder of this section describes the sequence for LMCn.
+        *
+        * Perform the following six substeps for LMC reset initialization:
+        *
+        * 1. If not done already, assert DDRn_RESET_L pin by writing
+        * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
+        * LMC(0..3)_RESET_CTL fields.
+        */
+
+       if (!ddr_memory_preserved(priv)) {
+               /*
+                * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
+                */
+
+               lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+
+               /*
+                * 3. Wait until RESET# assertion-time requirement from JEDEC
+                * DDR3/DDR4 specification is satisfied (200 us during a
+                * power-on ramp, 100ns when power is already stable).
+                */
+
+               udelay(200);
+
+               /*
+                * 4. Deassert DDRn_RESET_L pin by writing
+                *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
+                *    without modifying any other LMC(0..3)_RESET_CTL fields.
+                * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+                * 6. Wait a minimum of 500us. This guarantees the necessary
+                *    T = 500us delay between DDRn_RESET_L deassertion and
+                *    DDRn_DIMM*_CKE* assertion.
+                */
+               cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
+
+               /* Toggle Reset Again */
+               /* That is, assert, then de-assert, one more time */
+               cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
+               cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
+       }
+}
+
+void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+                  int sequence)
+{
+       /*
+        * 3. Without changing any other fields in LMC(0)_CONFIG, write
+        *    LMC(0)_CONFIG[RANKMASK] then write both
+        *    LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
+        *    operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
+        *    the ranks that will participate in the sequence.
+        *
+        *    The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
+        *    selfrefresh exit, depending on whether the DRAM parts are in
+        *    self-refresh and whether their contents should be preserved. While
+        *    LMC performs these sequences, it will not perform any other DDR3
+        *    transactions. When the sequence is complete, hardware sets the
+        *    LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
+        *    initialized.
+        *
+        *    If power-up/init is selected immediately following a DRESET
+        *    assertion, LMC executes the sequence described in the "Reset and
+        *    Initialization Procedure" section of the JEDEC DDR3
+        *    specification. This includes activating CKE, writing all four DDR3
+        *    mode registers on all selected ranks, and issuing the required
+        *    ZQCL
+        *    command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
+        *    with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
+        *    LMC writes the JEDEC standard SSTE32882 control words selected by
+        *    LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
+        *    the first DDR3 mode register write operation.
+        *    LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
+        *    corresponding DIMM is not present.
+        *
+        *    If self-refresh exit is selected, LMC executes the required SRX
+        *    command followed by a refresh and ZQ calibration. Section 4.5
+        *    describes behavior of a REF + ZQCS.  LMC does not write the DDR3
+        *    mode registers as part of this sequence, and the mode register
+        *    parameters must match at self-refresh entry and exit times.
+        *
+        * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
+        *    to be set.
+        *
+        * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
+        *    been initialized.
+        */
+
+       union cvmx_lmcx_seq_ctl seq_ctl;
+       union cvmx_lmcx_config lmc_config;
+       int timeout;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       lmc_config.s.rankmask = rank_mask;
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+
+       seq_ctl.u64 = 0;
+
+       seq_ctl.s.init_start = 1;
+       seq_ctl.s.seq_sel = sequence;
+
+       ddr_seq_print
+           ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
+            rank_mask, sequence, sequence_str[sequence]);
+
+       if (seq_ctl.s.seq_sel == 3)
+               debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
+                     rank_mask);
+
+       lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
+       lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
+
+       timeout = 100;
+       do {
+               udelay(100);    /* Wait a while */
+               seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
+               if (--timeout == 0) {
+                       printf("Sequence %d timed out\n", sequence);
+                       break;
+               }
+       } while (seq_ctl.s.seq_complete != 1);
+
+       ddr_seq_print("           LMC sequence=%x: Completed.\n", sequence);
+}
+
+#define bdk_numa_get_address(n, p)     ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
+#define AREA_BASE_OFFSET               BIT_ULL(26)
+
+static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
+                           u64 bitmask, u64 *xor_data)
+{
+       u64 p1, p2, d1, d2;
+       u64 v, v1;
+       u64 p2offset = (1ULL << 26);    // offset to area 2
+       u64 datamask;
+       u64 xor;
+       u64 i, j, k;
+       u64 ii;
+       int errors = 0;
+       //u64 index;
+       u64 pattern1 = cvmx_rng_get_random64();
+       u64 pattern2 = 0;
+       u64 bad_bits[2] = { 0, 0 };
+       int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
+       union cvmx_l2c_ctl l2c_ctl;
+       int burst;
+       int saved_dissblkdty;
+       int node = 0;
+
+       // Force full cacheline write-backs to boost traffic
+       l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+       saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
+       l2c_ctl.cn78xx.dissblkdty = 1;
+       l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+
+       if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
+               kbitno = 18;
+
+       // Byte lanes may be clear in the mask to indicate no testing on that
+       //lane.
+       datamask = bitmask;
+
+       /*
+        * Add offset to both test regions to not clobber boot stuff
+        * when running from L2 for NAND boot.
+        */
+       p += AREA_BASE_OFFSET;  // make sure base is out of the way of boot
+
+       // final address must include LMC and node
+       p |= (lmc << 7);        /* Map address into proper interface */
+       p = bdk_numa_get_address(node, p);      /* Map to node */
+       p |= 1ull << 63;
+
+#define II_INC BIT_ULL(22)
+#define II_MAX BIT_ULL(22)
+#define K_INC  BIT_ULL(14)
+#define K_MAX  BIT_ULL(kbitno)
+#define J_INC  BIT_ULL(9)
+#define J_MAX  BIT_ULL(12)
+#define I_INC  BIT_ULL(3)
+#define I_MAX  BIT_ULL(7)
+
+       debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
+             node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
+
+       // loops are ordered so that only a single 64-bit slot is written to
+       // each cacheline at one time, then the cachelines are forced out;
+       // this should maximize read/write traffic
+
+       // FIXME? extend the range of memory tested!!
+       for (ii = 0; ii < II_MAX; ii += II_INC) {
+               for (i = 0; i < I_MAX; i += I_INC) {
+                       for (k = 0; k < K_MAX; k += K_INC) {
+                               for (j = 0; j < J_MAX; j += J_INC) {
+                                       p1 = p + ii + k + j;
+                                       p2 = p1 + p2offset;
+
+                                       v = pattern1 * (p1 + i);
+                                       // write the same thing to both areas
+                                       v1 = v;
+
+                                       cvmx_write64_uint64(p1 + i, v);
+                                       cvmx_write64_uint64(p2 + i, v1);
+
+                                       CVMX_CACHE_WBIL2(p1, 0);
+                                       CVMX_CACHE_WBIL2(p2, 0);
+                               }
+                       }
+               }
+       }
+
+       CVMX_DCACHE_INVALIDATE;
+
+       debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
+
+       /* Make a series of passes over the memory areas. */
+
+       for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
+               u64 this_pattern = cvmx_rng_get_random64();
+
+               pattern2 ^= this_pattern;
+
+               /*
+                * XOR the data with a random value, applying the change to both
+                * memory areas.
+                */
+
+               // FIXME? extend the range of memory tested!!
+               for (ii = 0; ii < II_MAX; ii += II_INC) {
+                       // FIXME: rearranged, did not make much difference?
+                       for (i = 0; i < I_MAX; i += I_INC) {
+                               for (k = 0; k < K_MAX; k += K_INC) {
+                                       for (j = 0; j < J_MAX; j += J_INC) {
+                                               p1 = p + ii + k + j;
+                                               p2 = p1 + p2offset;
+
+                                               v = cvmx_read64_uint64(p1 +
+                                                                     i) ^
+                                                   this_pattern;
+                                               v1 = cvmx_read64_uint64(p2 +
+                                                                      i) ^
+                                                   this_pattern;
+
+                                               cvmx_write64_uint64(p1 + i, v);
+                                               cvmx_write64_uint64(p2 + i, v1);
+
+                                               CVMX_CACHE_WBIL2(p1, 0);
+                                               CVMX_CACHE_WBIL2(p2, 0);
+                                       }
+                               }
+                       }
+               }
+
+               CVMX_DCACHE_INVALIDATE;
+
+               debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
+                     node, lmc);
+
+               /*
+                * Look for differences in the areas. If there is a mismatch,
+                * reset both memory locations with the same pattern. Failing
+                * to do so means that on all subsequent passes the pair of
+                * locations remain out of sync giving spurious errors.
+                */
+
+               // FIXME: Change the loop order so that an entire cache line
+               //        is compared at one time. This is so that a read
+               //        error that occurs *anywhere* on the cacheline will
+               //        be caught, rather than comparing only 1 cacheline
+               //        slot at a time, where an error on a different
+               //        slot will be missed that time around
+               // Does the above make sense?
+
+               // FIXME? extend the range of memory tested!!
+               for (ii = 0; ii < II_MAX; ii += II_INC) {
+                       for (k = 0; k < K_MAX; k += K_INC) {
+                               for (j = 0; j < J_MAX; j += J_INC) {
+                                       p1 = p + ii + k + j;
+                                       p2 = p1 + p2offset;
+
+                                       // process entire cachelines in the
+                                       //innermost loop
+                                       for (i = 0; i < I_MAX; i += I_INC) {
+                                               int bybit = 1;
+                                               // start in byte lane 0
+                                               u64 bymsk = 0xffULL;
+
+                                               // FIXME: this should predict
+                                               // what we find...???
+                                               v = ((p1 + i) * pattern1) ^
+                                                       pattern2;
+                                               d1 = cvmx_read64_uint64(p1 + i);
+                                               d2 = cvmx_read64_uint64(p2 + i);
+
+                                               // union of error bits only in
+                                               // active byte lanes
+                                               xor = ((d1 ^ v) | (d2 ^ v)) &
+                                                       datamask;
+
+                                               if (!xor)
+                                                       continue;
+
+                                               // accumulate bad bits
+                                               bad_bits[0] |= xor;
+
+                                               while (xor != 0) {
+                                                       debug("ERROR(%03d): [0x%016llX] [0x%016llX]  expected 0x%016llX d1 %016llX d2 %016llX\n",
+                                                             burst, p1, p2, v,
+                                                             d1, d2);
+                                                       // error(s) in this lane
+                                                       if (xor & bymsk) {
+                                                               // set the byte
+                                                               // error bit
+                                                               errors |= bybit;
+                                                               // clear byte
+                                                               // lane in
+                                                               // error bits
+                                                               xor &= ~bymsk;
+                                                               // clear the
+                                                               // byte lane in
+                                                               // the mask
+                                                               datamask &= ~bymsk;
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+                                                               // nothing
+                                                               // left to do
+                                                               if (datamask == 0) {
+                                                                       return errors;
+                                                               }
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+                                                       }
+                                                       // move mask into
+                                                       // next byte lane
+                                                       bymsk <<= 8;
+                                                       // move bit into next
+                                                       // byte position
+                                                       bybit <<= 1;
+                                               }
+                                       }
+                                       CVMX_CACHE_WBIL2(p1, 0);
+                                       CVMX_CACHE_WBIL2(p2, 0);
+                               }
+                       }
+               }
+
+               debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
+                     node, lmc);
+       }
+
+       if (xor_data) {         // send the bad bits back...
+               xor_data[0] = bad_bits[0];
+               xor_data[1] = bad_bits[1];      // let it be zeroed
+       }
+
+       // Restore original setting that could enable partial cacheline writes
+       l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+       l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
+       l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+
+       return errors;
+}
+
+static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
+                    int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
+{
+       union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+       lmc_mr_mpr_ctl.u64 = 0;
+       lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
+       lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
+       lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
+       lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
+               (mr_wr_addr == -1) ? 1 : 0;
+       lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
+       lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+       /* Mode Register Write */
+       oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+}
+
+#define INV_A0_17(x)   ((x) ^ 0x22bf8)
+
+static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
+                        int if_num, int dimm_count, int mpr, int bg1)
+{
+       int rankx;
+
+       debug("All Ranks: Set mpr mode = %x %c-side\n",
+             mpr, (bg1 == 0) ? 'A' : 'B');
+
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+               if (bg1 == 0) {
+                       /* MR3 A-side */
+                       ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
+               } else {
+                       /* MR3 B-side */
+                       ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
+                                bg1);
+               }
+       }
+}
+
+static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
+                            int rank, int page, int location)
+{
+       union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+       lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+       lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
+       lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0;       /* Read=0, Write=1 */
+       lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+       /* MPR register access sequence */
+       oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
+
+       debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
+             lmc_mr_mpr_ctl.u64);
+       debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
+       debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
+       debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mpr_loc);
+       debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mpr_wr);
+}
+
+static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
+{
+       union cvmx_lmcx_control lmc_control;
+       int save_rdimm_mode;
+
+       lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       save_rdimm_mode = lmc_control.s.rdimm_ena;
+       lmc_control.s.rdimm_ena = enable;
+       debug("Setting RDIMM_ENA = %x\n", enable);
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+
+       return save_rdimm_mode;
+}
+
+static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
+                         int page, int location, u64 *mpr_data)
+{
+       do_ddr4_mpr_read(priv, if_num, rank, page, location);
+
+       mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
+}
+
+/* Display MPR values for Page */
+static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
+                            int if_num, int page)
+{
+       int rankx, location;
+       u64 mpr_data[3];
+
+       for (rankx = 0; rankx < 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
+                     if_num, rankx, page);
+               for (location = 0; location < 4; location++) {
+                       ddr4_mpr_read(priv, if_num, rankx, page, location,
+                                     mpr_data);
+                       debug("0x%02llx ", mpr_data[0] & 0xFF);
+               }
+               debug("\n");
+
+       }                       /* for (rankx = 0; rankx < 4; rankx++) */
+}
+
+static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
+                          int page, int location, u8 mpr_data)
+{
+       union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+       lmc_mr_mpr_ctl.u64 = 0;
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+       lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
+       lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1;       /* Read=0, Write=1 */
+       lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+       /* MPR register access sequence */
+       oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
+
+       debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
+             lmc_mr_mpr_ctl.u64);
+       debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
+       debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
+       debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mpr_loc);
+       debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
+             lmc_mr_mpr_ctl.cn70xx.mpr_wr);
+}
+
+static void set_vref(struct ddr_priv *priv, int if_num, int rank,
+                    int range, int value)
+{
+       union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+       union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
+       int mr_wr_addr = 0;
+
+       lmc_mr_mpr_ctl.u64 = 0;
+       lmc_modereg_params3.u64 = lmc_rd(priv,
+                                        CVMX_LMCX_MODEREG_PARAMS3(if_num));
+
+       /* A12:A10 tCCD_L */
+       mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
+       mr_wr_addr |= 1 << 7;   /* A7 1 = Enable(Training Mode) */
+       mr_wr_addr |= range << 6;       /* A6 vrefDQ Training Range */
+       mr_wr_addr |= value << 0;       /* A5:A0 vrefDQ Training Value */
+
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6;    /* Write MR6 */
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+       lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+       /* 0x8 = Mode Register Write */
+       oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+
+       /*
+        * It is vendor specific whether vref_value is captured with A7=1.
+        * A subsequent MRS might be necessary.
+        */
+       oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+
+       mr_wr_addr &= ~(1 << 7);        /* A7 0 = Disable(Training Mode) */
+       lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
+       lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+}
+
+static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
+                                     int dimm_count, int rank_mask,
+                                     int inversion)
+{
+       union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
+       union cvmx_lmcx_dimmx_params lmc_dimmx_params;
+       union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
+       int dimm_no;
+
+       /* Don't touch extenced register control words */
+       lmc_ddr4_dimm_ctl.u64 = 0;
+       lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
+
+       debug("All DIMMs: Register Control Word          RC0 : %x\n",
+             (inversion & 1));
+
+       for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
+               lmc_dimmx_params.u64 =
+                   lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
+               lmc_dimmx_params.s.rc0 =
+                   (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
+
+               lmc_wr(priv,
+                      CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
+                      lmc_dimmx_params.u64);
+       }
+
+       /* LMC0_DIMM_CTL */
+       lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+       lmc_dimm_ctl.s.dimm0_wmask = 0x1;
+       lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
+
+       debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+             lmc_dimm_ctl.u64);
+       lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
+
+       oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);    /* Init RCW */
+}
+
+static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
+                                   int if_num, int dimm_count, int pattern,
+                                   int location_mask)
+{
+       int rankx;
+       int location;
+
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+               for (location = 0; location < 4; ++location) {
+                       if (!(location_mask & (1 << location)))
+                               continue;
+
+                       ddr4_mpr_write(priv, if_num, rankx,
+                                      /* page */ 0, /* location */ location,
+                                      pattern);
+               }
+       }
+}
+
+static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
+                                    int if_num, int dimm_count)
+{
+       int save_ref_zqcs_int;
+       union cvmx_lmcx_config lmc_config;
+
+       /*
+        * Okay, here is the latest sequence.  This should work for all
+        * chips and passes (78,88,73,etc).  This sequence should be run
+        * immediately after DRAM INIT.  The basic idea is to write the
+        * same pattern into each of the 4 MPR locations in the DRAM, so
+        * that the same value is returned when doing MPR reads regardless
+        * of the inversion state.  My advice is to put this into a
+        * function, change_rdimm_mpr_pattern or something like that, so
+        * that it can be called multiple times, as I think David wants a
+        * clock-like pattern for OFFSET training, but does not want a
+        * clock pattern for Bit-Deskew.  You should then be able to call
+        * this at any point in the init sequence (after DRAM init) to
+        * change the pattern to a new value.
+        * Mike
+        *
+        * A correction: PHY doesn't need any pattern during offset
+        * training, but needs clock like pattern for internal vref and
+        * bit-dskew training.  So for that reason, these steps below have
+        * to be conducted before those trainings to pre-condition
+        * the pattern.  David
+        *
+        * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
+        * sequence. If you issue MRW sequence to do RCW write (in o78 pass
+        * 1 at least), LMC will still do two commands because
+        * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
+        * any unintentional mode register write so it's best to do what
+        * Mike is doing here.
+        * Andrew
+        */
+
+       /* 1) Disable refresh (REF_ZQCS_INT = 0) */
+
+       debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
+       lmc_config.cn78xx.ref_zqcs_int = 0;
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+
+       /*
+        * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
+        * with MODEREG_PARAMS0[MPRLOC]=0,
+        * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
+        * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
+        */
+
+       debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
+
+       /* A-side */
+       set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
+       /* B-side */
+       set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
+
+       /*
+        * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
+        * the value you would like directly into
+        * MR_MPR_CTL[MR_WR_ADDR]
+        */
+
+       /*
+        * 3) Disable RCD Parity (if previously enabled) - parity does not
+        * work if inversion disabled
+        */
+
+       debug("3) Disable RCD Parity\n");
+
+       /*
+        * 4) Disable Inversion in the RCD.
+        * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
+        * may be easier to use the MRW sequence (seq_sel=8).  Just set
+        * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
+        * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
+        */
+
+       debug("4) Disable Inversion in the RCD.\n");
+
+       set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
+
+       /*
+        * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
+        * non-inverted.
+        */
+
+       debug("5) Disable CONTROL[RDIMM_ENA]\n");
+
+       set_rdimm_mode(priv, if_num, 0);
+
+       /*
+        * 6) Write all 4 MPR registers with the desired pattern (have to
+        * do this for all enabled ranks)
+        * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
+        * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
+        */
+
+       debug("6) Write all 4 MPR page 0 Training Patterns\n");
+
+       write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
+
+       /* 7) Re-enable RDIMM_ENA */
+
+       debug("7) Re-enable RDIMM_ENA\n");
+
+       set_rdimm_mode(priv, if_num, 1);
+
+       /* 8) Re-enable RDIMM inversion */
+
+       debug("8) Re-enable RDIMM inversion\n");
+
+       set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
+
+       /* 9) Re-enable RDIMM parity (if desired) */
+
+       debug("9) Re-enable RDIMM parity (if desired)\n");
+
+       /*
+        * 10)Take B-side devices out of MPR mode (Run MRW sequence
+        * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
+        * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
+        * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
+        */
+
+       debug("10)Take B-side devices out of MPR mode\n");
+
+       set_mpr_mode(priv, rank_mask, if_num, dimm_count,
+                    /* mpr */ 0, /* bg1 */ 1);
+
+       /*
+        * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
+        * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
+        */
+
+       /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
+
+       debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+}
+
+static int validate_hwl_seq(int *wl, int *seq)
+{
+       // sequence index, step through the sequence array
+       int seqx;
+       int bitnum;
+
+       seqx = 0;
+
+       while (seq[seqx + 1] >= 0) {    // stop on next seq entry == -1
+               // but now, check current versus next
+               bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
+               // magic validity number (see matrix above)
+               if (!((1 << bitnum) & 0xBDE7))
+                       return 1;
+               seqx++;
+       }
+
+       return 0;
+}
+
+static int validate_hw_wl_settings(int if_num,
+                                  union cvmx_lmcx_wlevel_rankx
+                                  *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
+{
+       int wl[9], byte, errors;
+
+       // arrange the sequences so
+       // index 0 has byte 0, etc, ECC in middle
+       int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
+       // index 0 is ECC, then go down
+       int rseq1[] = { 8, 3, 2, 1, 0, -1 };
+       // index 0 has byte 4, then go up
+       int rseq2[] = { 4, 5, 6, 7, -1 };
+       // index 0 has byte 0, etc, no ECC
+       int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
+       // index 0 is byte 3, then go down, no ECC
+       int rseq1no[] = { 3, 2, 1, 0, -1 };
+
+       // in the CSR, bytes 0-7 are always data, byte 8 is ECC
+       for (byte = 0; byte < (8 + ecc_ena); byte++) {
+               // preprocess :-)
+               wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
+                           1) & 3;
+       }
+
+       errors = 0;
+       if (is_rdimm) {         // RDIMM order
+               errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
+               errors += validate_hwl_seq(wl, rseq2);
+       } else {                // UDIMM order
+               errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
+       }
+
+       return errors;
+}
+
+static unsigned int extr_wr(u64 u, int x)
+{
+       return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
+                             ((u >> (51 + x - 2)) & 0x4ULL));
+}
+
+static void insrt_wr(u64 *up, int x, int v)
+{
+       u64 u = *up;
+
+       u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
+       *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
+              ((v & 0x4ULL) << (51 + x - 2)));
+}
+
+/* Read out Deskew Settings for DDR */
+
+struct deskew_bytes {
+       u16 bits[8];
+};
+
+struct deskew_data {
+       struct deskew_bytes bytes[9];
+};
+
+struct dac_data {
+       int bytes[9];
+};
+
+// T88 pass 1, skip 4=DAC
+static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
+// T88 Pass 2, skip 4=DAC and 5=DBI
+static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
+
+static void get_deskew_settings(struct ddr_priv *priv, int if_num,
+                               struct deskew_data *dskdat)
+{
+       union cvmx_lmcx_phy_ctl phy_ctl;
+       union cvmx_lmcx_config lmc_config;
+       int bit_index;
+       int byte_lane, byte_limit;
+       // NOTE: these are for pass 2.x
+       int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
+       const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+       memset(dskdat, 0, sizeof(*dskdat));
+
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+       phy_ctl.s.dsk_dbg_clk_scaler = 3;
+
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+               phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane
+
+               for (bit_index = 0; bit_index < 8; ++bit_index) {
+                       // set bit number and start read sequence
+                       phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
+                       phy_ctl.s.dsk_dbg_rd_start = 1;
+                       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+                       // poll for read sequence to complete
+                       do {
+                               phy_ctl.u64 =
+                                       lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+                       } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+                       // record the data
+                       dskdat->bytes[byte_lane].bits[bit_index] =
+                               phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
+               }
+       }
+}
+
+static void display_deskew_settings(struct ddr_priv *priv, int if_num,
+                                   struct deskew_data *dskdat,
+                                   int print_enable)
+{
+       int byte_lane;
+       int bit_num;
+       u16 flags, deskew;
+       union cvmx_lmcx_config lmc_config;
+       int byte_limit;
+       const char *fc = " ?-=+*#&";
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+       if (print_enable) {
+               debug("N0.LMC%d: Deskew Data:              Bit =>      :",
+                     if_num);
+               for (bit_num = 7; bit_num >= 0; --bit_num)
+                       debug(" %3d  ", bit_num);
+               debug("\n");
+       }
+
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+               if (print_enable)
+                       debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
+                             if_num, byte_lane,
+                             (print_enable >= 3) ? "FINAL" : "     ");
+
+               for (bit_num = 7; bit_num >= 0; --bit_num) {
+                       flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
+                       deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
+
+                       if (print_enable)
+                               debug(" %3d %c", deskew, fc[flags ^ 1]);
+
+               }               /* for (bit_num = 7; bit_num >= 0; --bit_num) */
+
+               if (print_enable)
+                       debug("\n");
+       }
+}
+
+static void override_deskew_settings(struct ddr_priv *priv, int if_num,
+                                    struct deskew_data *dskdat)
+{
+       union cvmx_lmcx_phy_ctl phy_ctl;
+       union cvmx_lmcx_config lmc_config;
+
+       int bit, byte_lane, byte_limit;
+       u64 csr_data;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+
+       phy_ctl.s.phy_reset = 0;
+       phy_ctl.s.dsk_dbg_num_bits_sel = 1;
+       phy_ctl.s.dsk_dbg_offset = 0;
+       phy_ctl.s.dsk_dbg_clk_scaler = 3;
+
+       phy_ctl.s.dsk_dbg_wr_mode = 1;
+       phy_ctl.s.dsk_dbg_load_dis = 0;
+       phy_ctl.s.dsk_dbg_overwrt_ena = 0;
+
+       phy_ctl.s.phy_dsk_reset = 0;
+
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+       lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+               csr_data = 0;
+               // FIXME: can we ignore DBI?
+               for (bit = 0; bit < 8; ++bit) {
+                       // fetch input and adjust
+                       u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
+                               0x7F;
+
+                       /*
+                        * lmc_general_purpose0.data[6:0]    // DQ0
+                        * lmc_general_purpose0.data[13:7]   // DQ1
+                        * lmc_general_purpose0.data[20:14]  // DQ2
+                        * lmc_general_purpose0.data[27:21]  // DQ3
+                        * lmc_general_purpose0.data[34:28]  // DQ4
+                        * lmc_general_purpose0.data[41:35]  // DQ5
+                        * lmc_general_purpose0.data[48:42]  // DQ6
+                        * lmc_general_purpose0.data[55:49]  // DQ7
+                        * lmc_general_purpose0.data[62:56]  // DBI
+                        */
+                       csr_data |= (bits << (7 * bit));
+
+               } /* for (bit = 0; bit < 8; ++bit) */
+
+               // update GP0 with the bit data for this byte lane
+               lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
+               lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
+
+               // start the deskew load sequence
+               phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+               phy_ctl.s.dsk_dbg_rd_start = 1;
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+               // poll for read sequence to complete
+               do {
+                       udelay(100);
+                       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+               } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+       }
+
+       // tell phy to use the new settings
+       phy_ctl.s.dsk_dbg_overwrt_ena = 1;
+       phy_ctl.s.dsk_dbg_rd_start = 0;
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+       phy_ctl.s.dsk_dbg_wr_mode = 0;
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+}
+
+static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
+                               int rank_mask, struct dac_data *dacdat)
+{
+       union cvmx_lmcx_config lmc_config;
+       int rankx, byte_lane;
+       int byte_limit;
+       int rank_count;
+       struct dac_data dacsum;
+       int lane_probs;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+       memset((void *)&dacsum, 0, sizeof(dacsum));
+       rank_count = 0;
+       lane_probs = 0;
+
+       for (rankx = 0; rankx < 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+               rank_count++;
+
+               display_dac_dbi_settings(if_num, /*dac */ 1,
+                                        lmc_config.s.ecc_ena,
+                                        &dacdat[rankx].bytes[0],
+                                        "By-Ranks VREF");
+               // sum
+               for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+                       if (rank_count == 2) {
+                               int ranks_diff =
+                                   abs((dacsum.bytes[byte_lane] -
+                                        dacdat[rankx].bytes[byte_lane]));
+
+                               // FIXME: is 19 a good number?
+                               if (ranks_diff > 19)
+                                       lane_probs |= (1 << byte_lane);
+                       }
+                       dacsum.bytes[byte_lane] +=
+                           dacdat[rankx].bytes[byte_lane];
+               }
+       }
+
+       // average
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
+               dacsum.bytes[byte_lane] /= rank_count;  // FIXME: nint?
+
+       display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
+                                &dacsum.bytes[0], "All-Rank VREF");
+
+       if (lane_probs) {
+               debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
+                     if_num, lane_probs);
+       }
+
+       // finally, write the averaged DAC values
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+               load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
+                                 byte_lane);
+       }
+}
+
+static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
+                               int rank_mask, struct deskew_data *dskdat)
+{
+       union cvmx_lmcx_config lmc_config;
+       int rankx, lane, bit;
+       int byte_limit;
+       struct deskew_data dsksum, dskcnt;
+       u16 deskew;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+       memset((void *)&dsksum, 0, sizeof(dsksum));
+       memset((void *)&dskcnt, 0, sizeof(dskcnt));
+
+       for (rankx = 0; rankx < 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               // sum ranks
+               for (lane = 0; lane < byte_limit; lane++) {
+                       for (bit = 0; bit < 8; ++bit) {
+                               deskew = dskdat[rankx].bytes[lane].bits[bit];
+                               // if flags indicate sat hi or lo, skip it
+                               if (deskew & 6)
+                                       continue;
+
+                               // clear flags
+                               dsksum.bytes[lane].bits[bit] +=
+                                       deskew & ~7;
+                               // count entries
+                               dskcnt.bytes[lane].bits[bit] += 1;
+                       }
+               }
+       }
+
+       // average ranks
+       for (lane = 0; lane < byte_limit; lane++) {
+               for (bit = 0; bit < 8; ++bit) {
+                       int div = dskcnt.bytes[lane].bits[bit];
+
+                       if (div > 0) {
+                               dsksum.bytes[lane].bits[bit] /= div;
+                               // clear flags
+                               dsksum.bytes[lane].bits[bit] &= ~7;
+                               // set LOCK
+                               dsksum.bytes[lane].bits[bit] |= 1;
+                       } else {
+                               // FIXME? use reset value?
+                               dsksum.bytes[lane].bits[bit] =
+                                       (64 << 3) | 1;
+                       }
+               }
+       }
+
+       // TME for FINAL version
+       display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
+
+       // finally, write the averaged DESKEW values
+       override_deskew_settings(priv, if_num, &dsksum);
+}
+
+struct deskew_counts {
+       int saturated;          // number saturated
+       int unlocked;           // number unlocked
+       int nibrng_errs;        // nibble range errors
+       int nibunl_errs;        // nibble unlocked errors
+       int bitval_errs;        // bit value errors
+};
+
+#define MIN_BITVAL  17
+#define MAX_BITVAL 110
+
+static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
+                                    int if_num, struct deskew_counts *counts,
+                                    int print_flags)
+{
+       int byte_lane, bit_index, nib_num;
+       int nibrng_errs, nibunl_errs, bitval_errs;
+       union cvmx_lmcx_config lmc_config;
+       s16 nib_min[2], nib_max[2], nib_unl[2];
+       int byte_limit;
+       int print_enable = print_flags & 1;
+       struct deskew_data dskdat;
+       s16 flags, deskew;
+       const char *fc = " ?-=+*#&";
+       int bit_last;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+       memset(counts, 0, sizeof(struct deskew_counts));
+
+       get_deskew_settings(priv, if_num, &dskdat);
+
+       if (print_enable) {
+               debug("N0.LMC%d: Deskew Settings:          Bit =>      :",
+                     if_num);
+               for (bit_index = 7; bit_index >= 0; --bit_index)
+                       debug(" %3d  ", bit_index);
+               debug("\n");
+       }
+
+       for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+               if (print_enable)
+                       debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
+                             if_num, byte_lane,
+                             (print_flags & 2) ? "FINAL" : "     ");
+
+               nib_min[0] = 127;
+               nib_min[1] = 127;
+               nib_max[0] = 0;
+               nib_max[1] = 0;
+               nib_unl[0] = 0;
+               nib_unl[1] = 0;
+
+               if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
+                       bit_last = 3;
+                       if (print_enable)
+                               debug("                        ");
+               } else {
+                       bit_last = 7;
+               }
+
+               for (bit_index = bit_last; bit_index >= 0; --bit_index) {
+                       nib_num = (bit_index > 3) ? 1 : 0;
+
+                       flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
+                       deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
+
+                       counts->saturated += !!(flags & 6);
+
+                       // Do range calc even when locked; it could happen
+                       // that a bit is still unlocked after final retry,
+                       // and we want to have an external retry if a RANGE
+                       // error is present at exit...
+                       nib_min[nib_num] = min(nib_min[nib_num], deskew);
+                       nib_max[nib_num] = max(nib_max[nib_num], deskew);
+
+                       if (!(flags & 1)) {     // only when not locked
+                               counts->unlocked += 1;
+                               nib_unl[nib_num] += 1;
+                       }
+
+                       if (print_enable)
+                               debug(" %3d %c", deskew, fc[flags ^ 1]);
+               }
+
+               /*
+                * Now look for nibble errors
+                *
+                * For bit 55, it looks like a bit deskew problem. When the
+                * upper nibble of byte 6 needs to go to saturation, bit 7
+                * of byte 6 locks prematurely at 64. For DIMMs with raw
+                * card A and B, can we reset the deskew training when we
+                * encounter this case? The reset criteria should be looking
+                * at one nibble at a time for raw card A and B; if the
+                * bit-deskew setting within a nibble is different by > 33,
+                * we'll issue a reset to the bit deskew training.
+                *
+                * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
+                */
+               // upper nibble range, then lower nibble range
+               nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
+               nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
+
+               // check for nibble all unlocked
+               nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
+
+               // check for bit value errors, ie < 17 or > 110
+               // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
+               bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
+                              (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
+               bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
+                               (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
+
+               if ((nibrng_errs != 0 || nibunl_errs != 0 ||
+                    bitval_errs != 0) && print_enable) {
+                       debug(" %c%c%c",
+                             (nibrng_errs) ? 'R' : ' ',
+                             (nibunl_errs) ? 'U' : ' ',
+                             (bitval_errs) ? 'V' : ' ');
+               }
+
+               if (print_enable)
+                       debug("\n");
+
+               counts->nibrng_errs |= (nibrng_errs << byte_lane);
+               counts->nibunl_errs |= (nibunl_errs << byte_lane);
+               counts->bitval_errs |= (bitval_errs << byte_lane);
+       }
+}
+
+static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
+                                       int dac_value, int byte)
+{
+       union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+       // single bytelanes incr by 1; A is for ALL
+       int bytex = (byte == 0x0A) ? byte : byte + 1;
+
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       SET_DDR_DLL_CTL3(byte_sel, bytex);
+       SET_DDR_DLL_CTL3(offset, dac_value >> 1);
+
+       ddr_dll_ctl3.cn73xx.bit_select = 0x9;   /* No-op */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       ddr_dll_ctl3.cn73xx.bit_select = 0xC;   /* vref bypass setting load */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       ddr_dll_ctl3.cn73xx.bit_select = 0xD;   /* vref bypass on. */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       ddr_dll_ctl3.cn73xx.bit_select = 0x9;   /* No-op */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));       // flush writes
+
+       return (unsigned short)GET_DDR_DLL_CTL3(offset);
+}
+
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+// returns 9 entries (bytelanes 0 through 8) in settings[]
+// returns 0 if OK, -1 if a problem
+static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
+                                int dac_or_dbi, int *settings)
+{
+       union cvmx_lmcx_phy_ctl phy_ctl;
+       int byte_lane, bit_num;
+       int deskew;
+       int dac_value;
+       int new_deskew_layout = 0;
+
+       new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
+               octeon_is_cpuid(OCTEON_CNF75XX);
+       new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
+                             !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
+
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+       phy_ctl.s.dsk_dbg_clk_scaler = 3;
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+       bit_num = (dac_or_dbi) ? 4 : 5;
+       // DBI not available
+       if (bit_num == 5 && !new_deskew_layout)
+               return -1;
+
+       // FIXME: always assume ECC is available
+       for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
+               //set byte lane and bit to read
+               phy_ctl.s.dsk_dbg_bit_sel = bit_num;
+               phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+               //start read sequence
+               phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+               phy_ctl.s.dsk_dbg_rd_start = 1;
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+               //poll for read sequence to complete
+               do {
+                       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+               } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+               // keep the flag bits where they are for DBI
+               deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
+               dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
+
+               settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
+       }
+
+       return 0;
+}
+
+// print out the DBI settings array
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
+                                    int ecc_ena, int *settings, char *title)
+{
+       int byte;
+       int flags;
+       int deskew;
+       const char *fc = " ?-=+*#&";
+
+       debug("N0.LMC%d: %s %s Settings %d:0 :",
+             lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
+       // FIXME: what about 32-bit mode?
+       for (byte = (7 + ecc_ena); byte >= 0; --byte) {
+               if (dac_or_dbi) {       // DAC
+                       flags = 1;      // say its locked to get blank
+                       deskew = settings[byte] & 0xff;
+               } else {        // DBI
+                       flags = settings[byte] & 7;
+                       deskew = (settings[byte] >> 3) & 0x7f;
+               }
+               debug(" %3d %c", deskew, fc[flags ^ 1]);
+       }
+       debug("\n");
+}
+
+// Find a HWL majority
+static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
+                           int *xc, int *cc)
+{
+       int ix, ic;
+
+       *mx = -1;
+       *mc = 0;
+       *xc = 0;
+       *cc = 0;
+
+       for (ix = 0; ix < 4; ix++) {
+               ic = bc->bitcnt[ix];
+
+               // make a bitmask of the ones with a count
+               if (ic > 0) {
+                       *mc |= (1 << ix);
+                       *cc += 1;       // count how many had non-zero counts
+               }
+
+               // find the majority
+               if (ic > *xc) { // new max?
+                       *xc = ic;       // yes
+                       *mx = ix;       // set its index
+               }
+       }
+
+       return (*mx << 1);
+}
+
+// Evaluate the DAC settings array
+static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
+{
+       int byte, lane, dac, comp;
+       int last = (if_64b) ? 7 : 3;
+
+       // FIXME: change the check...???
+       // this looks only for sets of DAC values whose max/min differ by a lot
+       // let any EVEN go so long as it is within range...
+       for (byte = (last + ecc_ena); byte >= 0; --byte) {
+               dac = settings[byte] & 0xff;
+
+               for (lane = (last + ecc_ena); lane >= 0; --lane) {
+                       comp = settings[lane] & 0xff;
+                       if (abs((dac - comp)) > 25)
+                               return 1;
+               }
+       }
+
+       return 0;
+}
+
+static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
+                                   int if_num)
+{
+       union cvmx_lmcx_phy_ctl lmc_phy_ctl;
+       u64 orig_phy_ctl;
+       const char *s;
+
+       /*
+        * 4.8.6 LMC Offset Training
+        *
+        * LMC requires input-receiver offset training.
+        *
+        * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
+        */
+       lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+       orig_phy_ctl = lmc_phy_ctl.u64;
+       lmc_phy_ctl.s.dac_on = 1;
+
+       // allow full CSR override
+       s = lookup_env_ull(priv, "ddr_phy_ctl");
+       if (s)
+               lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
+
+       // do not print or write if CSR does not change...
+       if (lmc_phy_ctl.u64 != orig_phy_ctl) {
+               debug("PHY_CTL                                       : 0x%016llx\n",
+                     lmc_phy_ctl.u64);
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
+       }
+
+       /*
+        * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
+        *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+        *
+        * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+        */
+       /* Start Offset training sequence */
+       oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
+}
+
+static void perform_internal_vref_training(struct ddr_priv *priv,
+                                          int rank_mask, int if_num)
+{
+       union cvmx_lmcx_ext_config ext_config;
+       union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+
+       // First, make sure all byte-lanes are out of VREF bypass mode
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       ddr_dll_ctl3.cn78xx.byte_sel = 0x0A;    /* all byte-lanes */
+       ddr_dll_ctl3.cn78xx.bit_select = 0x09;  /* No-op */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       ddr_dll_ctl3.cn78xx.bit_select = 0x0E;  /* vref bypass off. */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       ddr_dll_ctl3.cn78xx.bit_select = 0x09;  /* No-op */
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+       /*
+        * 4.8.7 LMC Internal vref Training
+        *
+        * LMC requires input-reference-voltage training.
+        *
+        * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
+        */
+       ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+       ext_config.s.vrefint_seq_deskew = 0;
+
+       ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
+                     ext_config.s.vrefint_seq_deskew);
+
+       lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
+
+       /*
+        * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
+        *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+        *
+        * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+        */
+       /* Start LMC Internal vref Training */
+       oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+}
+
+#define dbg_avg(format, ...)   // debug(format, ##__VA_ARGS__)
+
+static int process_samples_average(s16 *bytes, int num_samples,
+                                  int lmc, int lane_no)
+{
+       int i, sadj, sum = 0, ret, asum, trunc;
+       s16 smin = 32767, smax = -32768;
+       int nmin, nmax;
+       //int rng;
+
+       dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
+
+       for (i = 0; i < num_samples; i++) {
+               sum += bytes[i];
+               if (bytes[i] < smin)
+                       smin = bytes[i];
+               if (bytes[i] > smax)
+                       smax = bytes[i];
+               dbg_avg(" %3d", bytes[i]);
+       }
+
+       nmin = 0;
+       nmax = 0;
+       for (i = 0; i < num_samples; i++) {
+               if (bytes[i] == smin)
+                       nmin += 1;
+               if (bytes[i] == smax)
+                       nmax += 1;
+       }
+       dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
+               smin, nmin, smax, nmax, rng, num_samples);
+
+       asum = sum - smin - smax;
+
+       sadj = divide_nint(asum * 10, (num_samples - 2));
+
+       trunc = asum / (num_samples - 2);
+
+       dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
+
+       sadj = divide_nint(sadj, 10);
+       if (trunc & 1)
+               ret = trunc;
+       else if (sadj & 1)
+               ret = sadj;
+       else
+               ret = trunc + 1;
+
+       dbg_avg(" -> %3d\n", ret);
+
+       return ret;
+}
+
+#define DEFAULT_SAT_RETRY_LIMIT    11  // 1 + 10 retries
+
+#define default_lock_retry_limit   20  // 20 retries
+#define deskew_validation_delay    10000       // 10 millisecs
+
+static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
+                                  int if_num, int spd_rawcard_aorb)
+{
+       int unsaturated, locked;
+       int sat_retries, sat_retries_limit;
+       int lock_retries, lock_retries_total, lock_retries_limit;
+       int print_first;
+       int print_them_all;
+       struct deskew_counts dsk_counts;
+       union cvmx_lmcx_phy_ctl phy_ctl;
+       char *s;
+       int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+               octeon_is_cpuid(OCTEON_CNF75XX);
+       int disable_bitval_retries = 1; // default to disabled
+
+       debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
+
+       sat_retries = 0;
+       sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
+
+       lock_retries_total = 0;
+       unsaturated = 0;
+       print_first = 1;        // print the first one
+       // set to true for printing all normal deskew attempts
+       print_them_all = 0;
+
+       // provide override for bitval_errs causing internal VREF retries
+       s = env_get("ddr_disable_bitval_retries");
+       if (s)
+               disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
+
+       lock_retries_limit = default_lock_retry_limit;
+       if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
+           (octeon_is_cpuid(OCTEON_CN73XX)) ||
+           (octeon_is_cpuid(OCTEON_CNF75XX)))
+               lock_retries_limit *= 2;        // give new chips twice as many
+
+       do {                    /* while (sat_retries < sat_retry_limit) */
+               /*
+                * 4.8.8 LMC Deskew Training
+                *
+                * LMC requires input-read-data deskew training.
+                *
+                * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
+                */
+
+               union cvmx_lmcx_ext_config ext_config;
+
+               ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+               ext_config.s.vrefint_seq_deskew = 1;
+
+               ddr_seq_print
+                   ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
+                    ext_config.s.vrefint_seq_deskew);
+
+               lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
+
+               /*
+                * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
+                *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+                *
+                * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+                */
+
+               phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+               phy_ctl.s.phy_dsk_reset = 1;    /* RESET Deskew sequence */
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+               /* LMC Deskew Training */
+               oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+
+               lock_retries = 0;
+
+perform_deskew_training:
+
+               phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+               phy_ctl.s.phy_dsk_reset = 0;    /* Normal Deskew sequence */
+               lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+               /* LMC Deskew Training */
+               oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+
+               // Moved this from validate_deskew_training
+               /* Allow deskew results to stabilize before evaluating them. */
+               udelay(deskew_validation_delay);
+
+               // Now go look at lock and saturation status...
+               validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
+                                        print_first);
+               // after printing the first and not doing them all, no more
+               if (print_first && !print_them_all)
+                       print_first = 0;
+
+               unsaturated = (dsk_counts.saturated == 0);
+               locked = (dsk_counts.unlocked == 0);
+
+               // only do locking retries if unsaturated or rawcard A or B,
+               // otherwise full SAT retry
+               if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
+                       if (!locked) {  // and not locked
+                               lock_retries++;
+                               lock_retries_total++;
+                               if (lock_retries <= lock_retries_limit) {
+                                       goto perform_deskew_training;
+                               } else {
+                                       debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
+                                             if_num, lock_retries_limit);
+                               }
+                       } else {
+                               // only print if we did try
+                               if (lock_retries_total > 0)
+                                       debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
+                                             if_num, lock_retries);
+                       }
+               }               /* if (unsaturated || spd_rawcard_aorb) */
+
+               ++sat_retries;
+
+               /*
+                * At this point, check for a DDR4 RDIMM that will not
+                * benefit from SAT retries; if so, exit
+                */
+               if (spd_rawcard_aorb && !has_no_sat) {
+                       debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
+                             if_num);
+                       break;  // no sat or lock retries
+               }
+
+       } while (!unsaturated && (sat_retries < sat_retries_limit));
+
+       debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
+             if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
+             "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
+
+       // FIXME? add saturation to reasons for fault return - give it a
+       // chance via Internal VREF
+       // FIXME? add OPTIONAL bit value to reasons for fault return -
+       // give it a chance via Internal VREF
+       if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
+           (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
+           !unsaturated) {
+               debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
+                     if_num);
+               // FIXME: do we want this output always for errors?
+               validate_deskew_training(priv, rank_mask, if_num,
+                                        &dsk_counts, 1);
+               return -1;      // we did retry locally, they did not help
+       }
+
+       // NOTE: we (currently) always print one last training validation
+       // before starting Read Leveling...
+
+       return 0;
+}
+
+#define SCALING_FACTOR (1000)
+
+// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
+static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
+                                   int rank_count, int dram_connection)
+{
+       u64 reff_s;
+       u64 rser_s = (dram_connection) ? 0 : 15;
+       u64 vdd = 1200;
+       u64 vref;
+       // 99 == HiZ
+       u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
+                       1 * 1024 * 1024 : rtt_wr);
+       u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
+                                              (rtt_wr != 0))) ?
+                         1 * 1024 * 1024 : rtt_park);
+       u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
+       int vref_value;
+       u64 rangepc = 6000;     // range1 base
+       u64 vrefpc;
+       int vref_range = 0;
+
+       reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
+
+       vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
+               (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
+
+       vref = (vref * vdd) / 2 / SCALING_FACTOR;
+
+       vrefpc = (vref * 100 * 100) / vdd;
+
+       if (vrefpc < rangepc) { // < range1 base, use range2
+               vref_range = 1 << 6;    // set bit A6 for range2
+               rangepc = 4500; // range2 base is 45%
+       }
+
+       vref_value = divide_nint(vrefpc - rangepc, 65);
+       if (vref_value < 0)
+               vref_value = vref_range;        // set to base of range
+       else
+               vref_value |= vref_range;
+
+       debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
+             rtt_wr, rtt_park, dqx_ctl, rank_count);
+       debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
+             rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
+             vref_range ? 2 : 1);
+
+       return vref_value;
+}
+
+// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
+static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
+                                   int rtt_park_01,
+                                   int dqx_ctl, int rtt_nom,
+                                   int dram_connection)
+{
+       u64 rser = (dram_connection) ? 0 : 15;
+       u64 vdd = 1200;
+       u64 vl, vlp, vcm;
+       u64 rd0, rd1, rpullup;
+       // 99 == HiZ
+       u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
+                       1 * 1024 * 1024 : rtt_wr);
+       u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
+       u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
+       u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
+       u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
+       int vref_value;
+       u64 rangepc = 6000;     // range1 base
+       u64 vrefpc;
+       int vref_range = 0;
+
+       // rd0 = (RTT_NOM (parallel) RTT_WR) +  =
+       // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
+       rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
+                         (rtt_nom_s + rtt_wr_s)) + rser;
+
+       // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
+       // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
+       rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
+                         (rtt_park_00_s + rtt_park_01_s)) + rser;
+
+       // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
+       rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
+
+       // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
+       vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
+
+       // vlp = ((RSER / rd0) * (1.2 - vl)) + vl
+       vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
+
+       // vcm = (vlp + 1.2) / 2
+       vcm = divide_nint((vlp + vdd), 2);
+
+       // vrefpc = (vcm / 1.2) * 100
+       vrefpc = divide_nint((vcm * 100 * 100), vdd);
+
+       if (vrefpc < rangepc) { // < range1 base, use range2
+               vref_range = 1 << 6;    // set bit A6 for range2
+               rangepc = 4500; // range2 base is 45%
+       }
+
+       vref_value = divide_nint(vrefpc - rangepc, 65);
+       if (vref_value < 0)
+               vref_value = vref_range;        // set to base of range
+       else
+               vref_value |= vref_range;
+
+       debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
+             rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
+             vref_value);
+
+       return vref_value;
+}
+
+// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
+static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
+                           int dimm_count, int rank_count,
+                           struct impedence_values *imp_values,
+                           int is_stacked_die, int dram_connection)
+{
+       int computed_final_vref_value = 0;
+       int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
+       const char *s;
+       int rtt_wr, dqx_ctl, rtt_nom, index;
+       union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
+       union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
+       union cvmx_lmcx_comp_ctl2 comp_ctl2;
+       int rtt_park;
+       int rtt_park_00;
+       int rtt_park_01;
+
+       debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
+             if_num, rankx, __func__, dram_connection);
+
+       // allow some overrides...
+       s = env_get("ddr_adjust_computed_vref");
+       if (s) {
+               enable_adjust = !!simple_strtoul(s, NULL, 0);
+               if (!enable_adjust) {
+                       debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
+                             if_num, rankx);
+               }
+       }
+
+       s = env_get("ddr_set_computed_vref");
+       if (s) {
+               int new_vref = simple_strtoul(s, NULL, 0);
+
+               debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
+                     if_num, rankx, new_vref, new_vref);
+               return new_vref;
+       }
+
+       /*
+        * Calculate an alternative to the measured vref value
+        * but only for configurations we know how to...
+        */
+       // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
+       // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
+       // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
+       // in 2-slot configs.
+
+       lmc_modereg_params1.u64 =
+           lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+       lmc_modereg_params2.u64 =
+           lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
+       comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+       dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
+
+       // WR always comes from the current rank
+       index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+               index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
+       rtt_wr = imp_values->rtt_wr_ohms[index];
+
+       // separate calculations for 1 vs 2 DIMMs per LMC
+       if (dimm_count == 1) {
+               // PARK comes from this rank if 1-rank, otherwise other rank
+               index =
+                   (lmc_modereg_params2.u64 >>
+                    ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
+               rtt_park = imp_values->rtt_nom_ohms[index];
+               computed_final_vref_value =
+                   compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
+                                            rank_count, dram_connection);
+       } else {
+               // get both PARK values from the other DIMM
+               index =
+                   (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
+                   0x07;
+               rtt_park_00 = imp_values->rtt_nom_ohms[index];
+               index =
+                   (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
+                   0x07;
+               rtt_park_01 = imp_values->rtt_nom_ohms[index];
+               // NOM comes from this rank if 1-rank, otherwise other rank
+               index =
+                   (lmc_modereg_params1.u64 >>
+                    ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
+               rtt_nom = imp_values->rtt_nom_ohms[index];
+               computed_final_vref_value =
+                   compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
+                                            dqx_ctl, rtt_nom, dram_connection);
+       }
+
+       if (enable_adjust) {
+               union cvmx_lmcx_config lmc_config;
+               union cvmx_lmcx_control lmc_control;
+
+               lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+               lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+
+               /*
+                *  New computed vref = existing computed vref â€“ X
+                *
+                * The value of X is depending on different conditions.
+                * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
+                * die 2Rx4, so I conclude the results into two conditions:
+                *
+                * 1. Stacked Die: 2Rx4
+                * 1-slot: offset = 7. i, e New computed vref = existing
+                * computed vref â€“ 7
+                * 2-slot: offset = 6
+                *
+                * 2. Regular: 2Rx4
+                * 1-slot: offset = 3
+                * 2-slot:  offset = 2
+                */
+               // we know we never get called unless DDR4, so test just
+               // the other conditions
+               if (lmc_control.s.rdimm_ena == 1 &&
+                   rank_count == 2 && lmc_config.s.mode_x4dev) {
+                       // it must first be RDIMM and 2-rank and x4
+                       int adj;
+
+                       // now do according to stacked die or not...
+                       if (is_stacked_die)
+                               adj = (dimm_count == 1) ? -7 : -6;
+                       else
+                               adj = (dimm_count == 1) ? -3 : -2;
+
+                       // we must have adjusted it, so print it out if
+                       // verbosity is right
+                       debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
+                             if_num, rankx, computed_final_vref_value,
+                             computed_final_vref_value,
+                             computed_final_vref_value + adj,
+                             computed_final_vref_value + adj);
+                       computed_final_vref_value += adj;
+               }
+       }
+
+       return computed_final_vref_value;
+}
+
+static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
+                                  struct rlevel_byte_data *rlevel_byte,
+                                  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
+{
+       if ((if_bytemask & 0xff) == 0xff) {
+               if (ecc_ena) {
+                       rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
+                       rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
+                       rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
+                       rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
+                       /* ECC */
+                       rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
+               } else {
+                       rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
+                       rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
+                       rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
+                       rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
+               }
+       } else {
+               rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */
+               rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */
+               rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */
+               rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */
+               rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */
+       }
+
+       rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
+       rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
+       rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
+       rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
+}
+
+static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
+                                struct rlevel_byte_data *rlevel_byte,
+                                union cvmx_lmcx_rlevel_rankx
+                                *final_rlevel_rank)
+{
+       union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
+
+       if ((if_bytemask & 0xff) == 0xff) {
+               if (ecc_ena) {
+                       lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
+                       lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
+                       lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
+                       lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
+                       /* ECC */
+                       lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
+               } else {
+                       lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
+                       lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
+                       lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
+                       lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
+               }
+       } else {
+               lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
+               lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
+               lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
+               lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
+               lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
+       }
+
+       lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
+       lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
+       lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
+       lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
+
+       *final_rlevel_rank = lmc_rlevel_rank;
+}
+
+/////////////////// These are the RLEVEL settings display routines
+
+// flags
+#define WITH_NOTHING 0
+#define WITH_SCORE   1
+#define WITH_AVERAGE 2
+#define WITH_FINAL   4
+#define WITH_COMPUTE 8
+
+static void do_display_rl(int if_num,
+                         union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+                         int rank, int flags, int score)
+{
+       char score_buf[16];
+       char *msg_buf;
+       char hex_buf[20];
+
+       if (flags & WITH_SCORE) {
+               snprintf(score_buf, sizeof(score_buf), "(%d)", score);
+       } else {
+               score_buf[0] = ' ';
+               score_buf[1] = 0;
+       }
+
+       if (flags & WITH_AVERAGE) {
+               msg_buf = "  DELAY AVERAGES  ";
+       } else if (flags & WITH_FINAL) {
+               msg_buf = "  FINAL SETTINGS  ";
+       } else if (flags & WITH_COMPUTE) {
+               msg_buf = "  COMPUTED DELAYS ";
+       } else {
+               snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
+                        (unsigned long long)lmc_rlevel_rank.u64);
+               msg_buf = hex_buf;
+       }
+
+       debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
+             if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
+             lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
+             lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
+             lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
+             lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
+             lmc_rlevel_rank.s.byte0, score_buf);
+}
+
+static void display_rl(int if_num,
+                      union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
+{
+       do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
+}
+
+static void display_rl_with_score(int if_num,
+                                 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+                                 int rank, int score)
+{
+       do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
+}
+
+static void display_rl_with_final(int if_num,
+                                 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+                                 int rank)
+{
+       do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
+}
+
+static void display_rl_with_computed(int if_num,
+                                    union cvmx_lmcx_rlevel_rankx
+                                    lmc_rlevel_rank, int rank, int score)
+{
+       do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
+}
+
+// flag values
+#define WITH_RODT_BLANK      0
+#define WITH_RODT_SKIPPING   1
+#define WITH_RODT_BESTROW    2
+#define WITH_RODT_BESTSCORE  3
+// control
+#define SKIP_SKIPPING 1
+
+static const char *with_rodt_canned_msgs[4] = {
+       "          ", "SKIPPING  ", "BEST ROW  ", "BEST SCORE"
+};
+
+static void display_rl_with_rodt(int if_num,
+                                union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+                                int rank, int score,
+                                int nom_ohms, int rodt_ohms, int flag)
+{
+       const char *msg_buf;
+       char set_buf[20];
+
+#if SKIP_SKIPPING
+       if (flag == WITH_RODT_SKIPPING)
+               return;
+#endif
+
+       msg_buf = with_rodt_canned_msgs[flag];
+       if (nom_ohms < 0) {
+               snprintf(set_buf, sizeof(set_buf), "    RODT %3d    ",
+                        rodt_ohms);
+       } else {
+               snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
+                        rodt_ohms);
+       }
+
+       debug("N0.LMC%d.R%d: Rlevel %s   %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
+             if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
+             lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
+             lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
+             lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
+             lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
+}
+
+static void do_display_wl(int if_num,
+                         union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
+                         int rank, int flags)
+{
+       char *msg_buf;
+       char hex_buf[20];
+
+       if (flags & WITH_FINAL) {
+               msg_buf = "  FINAL SETTINGS  ";
+       } else {
+               snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
+                        (unsigned long long)lmc_wlevel_rank.u64);
+               msg_buf = hex_buf;
+       }
+
+       debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+             if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
+             lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
+             lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
+             lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
+             lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
+             lmc_wlevel_rank.s.byte0);
+}
+
+static void display_wl(int if_num,
+                      union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
+{
+       do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
+}
+
+static void display_wl_with_final(int if_num,
+                                 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
+                                 int rank)
+{
+       do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
+}
+
+// pretty-print bitmask adjuster
+static u64 ppbm(u64 bm)
+{
+       if (bm != 0ul) {
+               while ((bm & 0x0fful) == 0ul)
+                       bm >>= 4;
+       }
+
+       return bm;
+}
+
+// xlate PACKED index to UNPACKED index to use with rlevel_byte
+#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
+// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
+#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
+
+// flag values
+#define WITH_WL_BITMASKS      0
+#define WITH_RL_BITMASKS      1
+#define WITH_RL_MASK_SCORES   2
+#define WITH_RL_SEQ_SCORES    3
+
+static void do_display_bm(int if_num, int rank, void *bm,
+                         int flags, int ecc)
+{
+       if (flags == WITH_WL_BITMASKS) {
+               // wlevel_bitmask array in PACKED index order, so just
+               // print them
+               int *bitmasks = (int *)bm;
+
+               debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks                 : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
+                     if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
+                     bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
+                     bitmasks[1], bitmasks[0]
+                       );
+       } else if (flags == WITH_RL_BITMASKS) {
+               // rlevel_bitmask array in PACKED index order, so just
+               // print them
+               struct rlevel_bitmask *rlevel_bitmask =
+                       (struct rlevel_bitmask *)bm;
+
+               debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks        8:0      : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
+                     if_num, rank, ppbm(rlevel_bitmask[8].bm),
+                     ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
+                     ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
+                     ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
+                     ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
+                       );
+       } else if (flags == WITH_RL_MASK_SCORES) {
+               // rlevel_bitmask array in PACKED index order, so just
+               // print them
+               struct rlevel_bitmask *rlevel_bitmask =
+                       (struct rlevel_bitmask *)bm;
+
+               debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+                     if_num, rank, rlevel_bitmask[8].errs,
+                     rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
+                     rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
+                     rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
+                     rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
+       } else if (flags == WITH_RL_SEQ_SCORES) {
+               // rlevel_byte array in UNPACKED index order, so xlate
+               // and print them
+               struct rlevel_byte_data *rlevel_byte =
+                       (struct rlevel_byte_data *)bm;
+
+               debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+                     if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
+                     rlevel_byte[XPU(7, ecc)].sqerrs,
+                     rlevel_byte[XPU(6, ecc)].sqerrs,
+                     rlevel_byte[XPU(5, ecc)].sqerrs,
+                     rlevel_byte[XPU(4, ecc)].sqerrs,
+                     rlevel_byte[XPU(3, ecc)].sqerrs,
+                     rlevel_byte[XPU(2, ecc)].sqerrs,
+                     rlevel_byte[XPU(1, ecc)].sqerrs,
+                     rlevel_byte[XPU(0, ecc)].sqerrs);
+       }
+}
+
+static void display_wl_bm(int if_num, int rank, int *bitmasks)
+{
+       do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
+}
+
+static void display_rl_bm(int if_num, int rank,
+                         struct rlevel_bitmask *bitmasks, int ecc_ena)
+{
+       do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
+                     ecc_ena);
+}
+
+static void display_rl_bm_scores(int if_num, int rank,
+                                struct rlevel_bitmask *bitmasks, int ecc_ena)
+{
+       do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
+                     ecc_ena);
+}
+
+static void display_rl_seq_scores(int if_num, int rank,
+                                 struct rlevel_byte_data *bytes, int ecc_ena)
+{
+       do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
+}
+
+#define RODT_OHMS_COUNT        8
+#define RTT_NOM_OHMS_COUNT     8
+#define RTT_NOM_TABLE_COUNT    8
+#define RTT_WR_OHMS_COUNT      8
+#define DIC_OHMS_COUNT         3
+#define DRIVE_STRENGTH_COUNT  15
+
+static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
+       0, 40, 60, 80, 120, 240, 34, 48 };
+static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
+       0, 60, 120, 40, 240, 48, 80, 34 };
+static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
+       0, 4, 2, 6, 1, 5, 3, 7 };
+// setting HiZ ohms to 99 for computed vref
+static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
+       0, 120, 240, 99, 80 };
+static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
+static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
+       0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
+static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
+       0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
+struct impedence_values ddr4_impedence_val = {
+       .rodt_ohms = ddr4_rodt_ohms,
+       .rtt_nom_ohms = ddr4_rtt_nom_ohms,
+       .rtt_nom_table = ddr4_rtt_nom_table,
+       .rtt_wr_ohms = ddr4_rtt_wr_ohms,
+       .dic_ohms = ddr4_dic_ohms,
+       .drive_strength = ddr4_drive_strength,
+       .dqx_strength = ddr4_dqx_strength,
+};
+
+static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
+       0, 20, 30, 40, 60, 120, 0, 0 };
+static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
+       0, 60, 120, 40, 20, 30, 0, 0 };
+static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
+       0, 2, 1, 3, 5, 4, 0, 0 };
+static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
+static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
+static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
+       0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
+static struct impedence_values ddr3_impedence_val = {
+       .rodt_ohms = ddr3_rodt_ohms,
+       .rtt_nom_ohms = ddr3_rtt_nom_ohms,
+       .rtt_nom_table = ddr3_rtt_nom_table,
+       .rtt_wr_ohms = ddr3_rtt_wr_ohms,
+       .dic_ohms = ddr3_dic_ohms,
+       .drive_strength = ddr3_drive_strength,
+       .dqx_strength = ddr3_drive_strength,
+};
+
+static u64 hertz_to_psecs(u64 hertz)
+{
+       /* Clock in psecs */
+       return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
+}
+
+#define DIVIDEND_SCALE 1000    /* Scale to avoid rounding error. */
+
+static u64 psecs_to_mts(u64 psecs)
+{
+       return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
+                                      psecs), DIVIDEND_SCALE);
+}
+
+#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
+
+static unsigned long pretty_psecs_to_mts(u64 psecs)
+{
+       u64 ret = 0;            // default to error
+
+       if (WITHIN(psecs, 2500, 1))
+               ret = 800;
+       else if (WITHIN(psecs, 1875, 1))
+               ret = 1066;
+       else if (WITHIN(psecs, 1500, 1))
+               ret = 1333;
+       else if (WITHIN(psecs, 1250, 1))
+               ret = 1600;
+       else if (WITHIN(psecs, 1071, 1))
+               ret = 1866;
+       else if (WITHIN(psecs, 937, 1))
+               ret = 2133;
+       else if (WITHIN(psecs, 833, 1))
+               ret = 2400;
+       else if (WITHIN(psecs, 750, 1))
+               ret = 2666;
+       return ret;
+}
+
+static u64 mts_to_hertz(u64 mts)
+{
+       return ((mts * 1000 * 1000) / 2);
+}
+
+static int compute_rc3x(int64_t tclk_psecs)
+{
+       long speed;
+       long tclk_psecs_min, tclk_psecs_max;
+       long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
+       int rc3x;
+
+#define ENCODING_BASE 1240
+
+       data_rate_mhz = psecs_to_mts(tclk_psecs);
+
+       /*
+        * 2400 MT/s is a special case. Using integer arithmetic it rounds
+        * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
+        * proper setting from the table.
+        */
+       if (tclk_psecs == 833)
+               data_rate_mhz = 2400;
+
+       for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
+               int error = 0;
+
+               /* Clock in psecs */
+               tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
+               /* Clock in psecs */
+               tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
+
+               data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
+               data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
+
+               /* Force alingment to multiple to avound rounding errors. */
+               data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
+               data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
+
+               error += (speed + 00 != data_rate_mhz_min);
+               error += (speed + 20 != data_rate_mhz_max);
+
+               rc3x = (speed - ENCODING_BASE) / 20;
+
+               if (data_rate_mhz <= (speed + 20))
+                       break;
+       }
+
+       return rc3x;
+}
+
+/*
+ * static global variables needed, so that functions (loops) can be
+ * restructured from the main huge function. Its not elegant, but the
+ * only way to break the original functions like init_octeon3_ddr3_interface()
+ * into separate logical smaller functions with less indentation levels.
+ */
+static int if_num __section(".data");
+static u32 if_mask __section(".data");
+static int ddr_hertz __section(".data");
+
+static struct ddr_conf *ddr_conf __section(".data");
+static const struct dimm_odt_config *odt_1rank_config __section(".data");
+static const struct dimm_odt_config *odt_2rank_config __section(".data");
+static const struct dimm_odt_config *odt_4rank_config __section(".data");
+static struct dimm_config *dimm_config_table __section(".data");
+static const struct dimm_odt_config *odt_config __section(".data");
+static const struct ddr3_custom_config *c_cfg __section(".data");
+
+static int odt_idx __section(".data");
+
+static ulong tclk_psecs __section(".data");
+static ulong eclk_psecs __section(".data");
+
+static int row_bits __section(".data");
+static int col_bits __section(".data");
+static int num_banks __section(".data");
+static int num_ranks __section(".data");
+static int dram_width __section(".data");
+static int dimm_count __section(".data");
+/* Accumulate and report all the errors before giving up */
+static int fatal_error __section(".data");
+/* Flag that indicates safe DDR settings should be used */
+static int safe_ddr_flag __section(".data");
+/* Octeon II Default: 64bit interface width */
+static int if_64b __section(".data");
+static int if_bytemask __section(".data");
+static u32 mem_size_mbytes __section(".data");
+static unsigned int didx __section(".data");
+static int bank_bits __section(".data");
+static int bunk_enable __section(".data");
+static int rank_mask __section(".data");
+static int column_bits_start __section(".data");
+static int row_lsb __section(".data");
+static int pbank_lsb __section(".data");
+static int use_ecc __section(".data");
+static int mtb_psec __section(".data");
+static short ftb_dividend __section(".data");
+static short ftb_divisor __section(".data");
+static int taamin __section(".data");
+static int tckmin __section(".data");
+static int cl __section(".data");
+static int min_cas_latency __section(".data");
+static int max_cas_latency __section(".data");
+static int override_cas_latency __section(".data");
+static int ddr_rtt_nom_auto __section(".data");
+static int ddr_rodt_ctl_auto __section(".data");
+
+static int spd_addr __section(".data");
+static int spd_org __section(".data");
+static int spd_banks __section(".data");
+static int spd_rdimm __section(".data");
+static int spd_dimm_type __section(".data");
+static int spd_ecc __section(".data");
+static u32 spd_cas_latency __section(".data");
+static int spd_mtb_dividend __section(".data");
+static int spd_mtb_divisor __section(".data");
+static int spd_tck_min __section(".data");
+static int spd_taa_min __section(".data");
+static int spd_twr __section(".data");
+static int spd_trcd __section(".data");
+static int spd_trrd __section(".data");
+static int spd_trp __section(".data");
+static int spd_tras __section(".data");
+static int spd_trc __section(".data");
+static int spd_trfc __section(".data");
+static int spd_twtr __section(".data");
+static int spd_trtp __section(".data");
+static int spd_tfaw __section(".data");
+static int spd_addr_mirror __section(".data");
+static int spd_package __section(".data");
+static int spd_rawcard __section(".data");
+static int spd_rawcard_aorb __section(".data");
+static int spd_rdimm_registers __section(".data");
+static int spd_thermal_sensor __section(".data");
+
+static int is_stacked_die __section(".data");
+static int is_3ds_dimm __section(".data");
+// 3DS: logical ranks per package rank
+static int lranks_per_prank __section(".data");
+// 3DS: logical ranks bits
+static int lranks_bits __section(".data");
+// in Mbits; only used for 3DS
+static int die_capacity __section(".data");
+
+static enum ddr_type ddr_type __section(".data");
+
+static int twr __section(".data");
+static int trcd __section(".data");
+static int trrd __section(".data");
+static int trp __section(".data");
+static int tras __section(".data");
+static int trc __section(".data");
+static int trfc __section(".data");
+static int twtr __section(".data");
+static int trtp __section(".data");
+static int tfaw __section(".data");
+
+static int ddr4_tckavgmin __section(".data");
+static int ddr4_tckavgmax __section(".data");
+static int ddr4_trdcmin __section(".data");
+static int ddr4_trpmin __section(".data");
+static int ddr4_trasmin __section(".data");
+static int ddr4_trcmin __section(".data");
+static int ddr4_trfc1min __section(".data");
+static int ddr4_trfc2min __section(".data");
+static int ddr4_trfc4min __section(".data");
+static int ddr4_tfawmin __section(".data");
+static int ddr4_trrd_smin __section(".data");
+static int ddr4_trrd_lmin __section(".data");
+static int ddr4_tccd_lmin __section(".data");
+
+static int wl_mask_err __section(".data");
+static int wl_loops __section(".data");
+static int default_rtt_nom[4] __section(".data");
+static int dyn_rtt_nom_mask __section(".data");
+static struct impedence_values *imp_val __section(".data");
+static char default_rodt_ctl __section(".data");
+// default to disabled (ie, try LMC restart, not chip reset)
+static int ddr_disable_chip_reset __section(".data");
+static const char *dimm_type_name __section(".data");
+static int match_wl_rtt_nom __section(".data");
+
+struct hwl_alt_by_rank {
+       u16 hwl_alt_mask;       // mask of bytelanes with alternate
+       u16 hwl_alt_delay[9];   // bytelane alternate avail if mask=1
+};
+
+static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
+
+#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5
+static int internal_retries __section(".data");
+
+static int deskew_training_errors __section(".data");
+static struct deskew_counts deskew_training_results __section(".data");
+static int disable_deskew_training __section(".data");
+static int restart_if_dsk_incomplete __section(".data");
+static int dac_eval_retries __section(".data");
+static int dac_settings[9] __section(".data");
+static int num_samples __section(".data");
+static int sample __section(".data");
+static int lane __section(".data");
+static int last_lane __section(".data");
+static int total_dac_eval_retries __section(".data");
+static int dac_eval_exhausted __section(".data");
+
+#define DEFAULT_DAC_SAMPLES 7  // originally was 5
+#define DAC_RETRIES_LIMIT   2
+
+struct bytelane_sample {
+       s16 bytes[DEFAULT_DAC_SAMPLES];
+};
+
+static struct bytelane_sample lanes[9] __section(".data");
+
+static char disable_sequential_delay_check __section(".data");
+static int wl_print __section(".data");
+
+static int enable_by_rank_init __section(".data");
+static int saved_rank_mask __section(".data");
+static int by_rank __section(".data");
+static struct deskew_data rank_dsk[4] __section(".data");
+static struct dac_data rank_dac[4] __section(".data");
+
+// todo: perhaps remove node at some time completely?
+static int node __section(".data");
+static int base_cl __section(".data");
+
+/* Parameters from DDR3 Specifications */
+#define DDR3_TREFI         7800000     /* 7.8 us */
+#define DDR3_ZQCS          80000ull    /* 80 ns */
+#define DDR3_ZQCS_INTERNAL 1280000000ull       /* 128ms/100 */
+#define DDR3_TCKE          5000        /* 5 ns */
+#define DDR3_TMRD          4   /* 4 nCK */
+#define DDR3_TDLLK         512 /* 512 nCK */
+#define DDR3_TMPRR         1   /* 1 nCK */
+#define DDR3_TWLMRD        40  /* 40 nCK */
+#define DDR3_TWLDQSEN      25  /* 25 nCK */
+
+/* Parameters from DDR4 Specifications */
+#define DDR4_TMRD          8   /* 8 nCK */
+#define DDR4_TDLLK         768 /* 768 nCK */
+
+static void lmc_config(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_config cfg;
+       char *s;
+
+       cfg.u64 = 0;
+
+       cfg.cn78xx.ecc_ena = use_ecc;
+       cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
+       cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
+
+       cfg.cn78xx.idlepower = 0;       /* Disabled */
+
+       s = lookup_env(priv, "ddr_idlepower");
+       if (s)
+               cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
+
+       cfg.cn78xx.forcewrite = 0;      /* Disabled */
+       /* Include memory reference address in the ECC */
+       cfg.cn78xx.ecc_adr = 1;
+
+       s = lookup_env(priv, "ddr_ecc_adr");
+       if (s)
+               cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
+
+       cfg.cn78xx.reset = 0;
+
+       /*
+        * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
+        * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
+        * ref_zqcs_int(18:7) to
+        * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
+        * value should always be greater than 32, to account for
+        * resistor calibration delays.
+        */
+
+       cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
+       cfg.cn78xx.ref_zqcs_int |=
+               ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
+                             (512 * 128))) & 0xfff) << 7);
+
+       cfg.cn78xx.early_dqx = 1;       /* Default to enabled */
+
+       s = lookup_env(priv, "ddr_early_dqx");
+       if (!s)
+               s = lookup_env(priv, "ddr%d_early_dqx", if_num);
+
+       if (s)
+               cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
+
+       cfg.cn78xx.sref_with_dll = 0;
+
+       cfg.cn78xx.rank_ena = bunk_enable;
+       cfg.cn78xx.rankmask = rank_mask;        /* Set later */
+       cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
+               rank_mask;
+       /* Set once and don't change it. */
+       cfg.cn78xx.init_status = rank_mask;
+       cfg.cn78xx.early_unload_d0_r0 = 0;
+       cfg.cn78xx.early_unload_d0_r1 = 0;
+       cfg.cn78xx.early_unload_d1_r0 = 0;
+       cfg.cn78xx.early_unload_d1_r1 = 0;
+       cfg.cn78xx.scrz = 0;
+       if (octeon_is_cpuid(OCTEON_CN70XX))
+               cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */
+       cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
+       cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
+                                (dram_width == 16)) ? 0 : 1;
+
+       s = lookup_env_ull(priv, "ddr_config");
+       if (s)
+               cfg.u64 = simple_strtoull(s, NULL, 0);
+       debug("LMC_CONFIG                                    : 0x%016llx\n",
+             cfg.u64);
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+}
+
+static void lmc_control(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_control ctrl;
+       char *s;
+
+       ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       ctrl.s.rdimm_ena = spd_rdimm;
+       ctrl.s.bwcnt = 0;       /* Clear counter later */
+       if (spd_rdimm)
+               ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
+       else
+               ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
+       ctrl.s.pocas = 0;
+       ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
+       ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
+       ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
+       ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
+       ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
+       ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
+       /* discards writes to addresses that don't exist in the DRAM */
+       ctrl.s.nxm_write_en = 0;
+       ctrl.s.max_write_batch = 8;
+       ctrl.s.xor_bank = 1;
+       ctrl.s.auto_dclkdis = 1;
+       ctrl.s.int_zqcs_dis = 0;
+       ctrl.s.ext_zqcs_dis = 0;
+       ctrl.s.bprch = 1;
+       ctrl.s.wodt_bprch = 1;
+       ctrl.s.rodt_bprch = 1;
+
+       s = lookup_env(priv, "ddr_xor_bank");
+       if (s)
+               ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_2t");
+       if (s)
+               ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_fprch2");
+       if (s)
+               ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_bprch");
+       if (s)
+               ctrl.s.bprch = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_wodt_bprch");
+       if (s)
+               ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rodt_bprch");
+       if (s)
+               ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_int_zqcs_dis");
+       if (s)
+               ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_ext_zqcs_dis");
+       if (s)
+               ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env_ull(priv, "ddr_control");
+       if (s)
+               ctrl.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("LMC_CONTROL                                   : 0x%016llx\n",
+             ctrl.u64);
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+}
+
+static void lmc_timing_params0(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_timing_params0 tp0;
+       unsigned int trp_value;
+       char *s;
+
+       tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
+
+       trp_value = divide_roundup(trp, tclk_psecs) - 1;
+       debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
+             trp_value +
+             (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
+                                           tclk_psecs)) - 4);
+       s = lookup_env_ull(priv, "ddr_use_old_trp");
+       if (s) {
+               if (!!simple_strtoull(s, NULL, 0)) {
+                       trp_value +=
+                           divide_roundup(max(4ull * tclk_psecs, 7500ull),
+                                          tclk_psecs) - 4;
+                       debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
+                             trp_value);
+               }
+       }
+
+       tp0.cn78xx.txpr =
+           divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
+                          16 * tclk_psecs);
+       tp0.cn78xx.trp = trp_value & 0x1f;
+       tp0.cn78xx.tcksre =
+           divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
+
+       if (ddr_type == DDR4_DRAM) {
+               int tzqinit = 4;        // Default to 4, for all DDR4 speed bins
+
+               s = lookup_env(priv, "ddr_tzqinit");
+               if (s)
+                       tzqinit = simple_strtoul(s, NULL, 0);
+
+               tp0.cn78xx.tzqinit = tzqinit;
+               /* Always 8. */
+               tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
+                                                 (16 * tclk_psecs));
+               tp0.cn78xx.tcke =
+                   divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
+                                  tclk_psecs) - 1;
+               tp0.cn78xx.tmrd =
+                   divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
+               tp0.cn78xx.tmod = 25;   /* 25 is the max allowed */
+               tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
+       } else {
+               tp0.cn78xx.tzqinit =
+                   divide_roundup(max(512ull * tclk_psecs, 640000ull),
+                                  (256 * tclk_psecs));
+               tp0.cn78xx.tzqcs =
+                   divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
+                                  (16 * tclk_psecs));
+               tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
+               tp0.cn78xx.tmrd =
+                   divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
+               tp0.cn78xx.tmod =
+                   divide_roundup(max(12ull * tclk_psecs, 15000ull),
+                                  tclk_psecs) - 1;
+               tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
+       }
+
+       s = lookup_env_ull(priv, "ddr_timing_params0");
+       if (s)
+               tp0.u64 = simple_strtoull(s, NULL, 0);
+       debug("TIMING_PARAMS0                                : 0x%016llx\n",
+             tp0.u64);
+       lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
+}
+
+static void lmc_timing_params1(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_timing_params1 tp1;
+       unsigned int txp, temp_trcd, trfc_dlr;
+       char *s;
+
+       tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+
+       /* .cn70xx. */
+       tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
+
+       tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
+
+       temp_trcd = divide_roundup(trcd, tclk_psecs);
+       if (temp_trcd > 15) {
+               debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
+                     temp_trcd);
+       }
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
+               /*
+                * Let .trcd=0 serve as a flag that the field has
+                * overflowed. Must use Additive Latency mode as a
+                * workaround.
+                */
+               temp_trcd = 0;
+       }
+       tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
+       tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
+
+       tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
+       tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
+
+       if (ddr_type == DDR4_DRAM) {
+               /* Workaround bug 24006. Use Trrd_l. */
+               tp1.cn78xx.trrd =
+                   divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
+       } else {
+               tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
+       }
+
+       /*
+        * tXP = max( 3nCK, 7.5 ns)     DDR3-800   tCLK = 2500 psec
+        * tXP = max( 3nCK, 7.5 ns)     DDR3-1066  tCLK = 1875 psec
+        * tXP = max( 3nCK, 6.0 ns)     DDR3-1333  tCLK = 1500 psec
+        * tXP = max( 3nCK, 6.0 ns)     DDR3-1600  tCLK = 1250 psec
+        * tXP = max( 3nCK, 6.0 ns)     DDR3-1866  tCLK = 1071 psec
+        * tXP = max( 3nCK, 6.0 ns)     DDR3-2133  tCLK =  937 psec
+        */
+       txp = (tclk_psecs < 1875) ? 6000 : 7500;
+       txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
+                            tclk_psecs) - 1;
+       if (txp > 7) {
+               debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
+                     txp);
+       }
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
+               txp = 7;        // max it out
+       tp1.cn78xx.txp = (txp >> 0) & 7;
+       tp1.cn78xx.txp_ext = (txp >> 3) & 1;
+
+       tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
+                                          4 * tclk_psecs);
+       tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
+                                            4 * tclk_psecs);
+       tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
+       tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
+                                          tclk_psecs) - 1;
+
+       if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
+               /*
+                * 4 Gb: tRFC_DLR = 90 ns
+                * 8 Gb: tRFC_DLR = 120 ns
+                * 16 Gb: tRFC_DLR = 190 ns FIXME?
+                */
+               if (die_capacity == 0x1000)     // 4 Gbit
+                       trfc_dlr = 90;
+               else if (die_capacity == 0x2000)        // 8 Gbit
+                       trfc_dlr = 120;
+               else if (die_capacity == 0x4000)        // 16 Gbit
+                       trfc_dlr = 190;
+               else
+                       trfc_dlr = 0;
+
+               if (trfc_dlr == 0) {
+                       debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
+                             node, if_num, die_capacity);
+               } else {
+                       tp1.cn78xx.trfc_dlr =
+                           divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
+                       debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
+                             node, if_num, tp1.cn78xx.trfc_dlr);
+               }
+       }
+
+       s = lookup_env_ull(priv, "ddr_timing_params1");
+       if (s)
+               tp1.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("TIMING_PARAMS1                                : 0x%016llx\n",
+             tp1.u64);
+       lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+}
+
+static void lmc_timing_params2(struct ddr_priv *priv)
+{
+       if (ddr_type == DDR4_DRAM) {
+               union cvmx_lmcx_timing_params1 tp1;
+               union cvmx_lmcx_timing_params2 tp2;
+               int temp_trrd_l;
+
+               tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+               tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
+               debug("TIMING_PARAMS2                                : 0x%016llx\n",
+                     tp2.u64);
+
+               temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
+               if (temp_trrd_l > 7)
+                       debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
+                             temp_trrd_l);
+               if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
+                       temp_trrd_l = 7;        // max it out
+               tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
+               tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
+
+               // correct for 1600-2400
+               tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
+                                          tclk_psecs) - 1;
+               tp2.s.t_rw_op_max = 7;
+               tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
+                                           tclk_psecs) - 1;
+
+               debug("TIMING_PARAMS2                                : 0x%016llx\n",
+                     tp2.u64);
+               lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
+
+               /*
+                * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
+                * for Write-to-Read operations to the same Bank Group
+                */
+               if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
+                       tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
+                       debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
+                             tp1.cn78xx.twtr, tp2.s.twtr_l);
+                       debug("TIMING_PARAMS1                                : 0x%016llx\n",
+                             tp1.u64);
+                       lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+               }
+       }
+}
+
+static void lmc_modereg_params0(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_modereg_params0 mp0;
+       int param;
+       char *s;
+
+       mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+
+       if (ddr_type == DDR4_DRAM) {
+               mp0.s.cwl = 0;  /* 1600 (1250ps) */
+               if (tclk_psecs < 1250)
+                       mp0.s.cwl = 1;  /* 1866 (1072ps) */
+               if (tclk_psecs < 1072)
+                       mp0.s.cwl = 2;  /* 2133 (938ps) */
+               if (tclk_psecs < 938)
+                       mp0.s.cwl = 3;  /* 2400 (833ps) */
+               if (tclk_psecs < 833)
+                       mp0.s.cwl = 4;  /* 2666 (750ps) */
+               if (tclk_psecs < 750)
+                       mp0.s.cwl = 5;  /* 3200 (625ps) */
+       } else {
+               /*
+                ** CSR   CWL         CAS write Latency
+                ** ===   ===   =================================
+                **  0      5   (           tCK(avg) >=   2.5 ns)
+                **  1      6   (2.5 ns   > tCK(avg) >= 1.875 ns)
+                **  2      7   (1.875 ns > tCK(avg) >= 1.5   ns)
+                **  3      8   (1.5 ns   > tCK(avg) >= 1.25  ns)
+                **  4      9   (1.25 ns  > tCK(avg) >= 1.07  ns)
+                **  5     10   (1.07 ns  > tCK(avg) >= 0.935 ns)
+                **  6     11   (0.935 ns > tCK(avg) >= 0.833 ns)
+                **  7     12   (0.833 ns > tCK(avg) >= 0.75  ns)
+                */
+
+               mp0.s.cwl = 0;
+               if (tclk_psecs < 2500)
+                       mp0.s.cwl = 1;
+               if (tclk_psecs < 1875)
+                       mp0.s.cwl = 2;
+               if (tclk_psecs < 1500)
+                       mp0.s.cwl = 3;
+               if (tclk_psecs < 1250)
+                       mp0.s.cwl = 4;
+               if (tclk_psecs < 1070)
+                       mp0.s.cwl = 5;
+               if (tclk_psecs < 935)
+                       mp0.s.cwl = 6;
+               if (tclk_psecs < 833)
+                       mp0.s.cwl = 7;
+       }
+
+       s = lookup_env(priv, "ddr_cwl");
+       if (s)
+               mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
+
+       if (ddr_type == DDR4_DRAM) {
+               debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+                     mp0.s.cwl + 9
+                     + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
+       } else {
+               debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+                     mp0.s.cwl + 5, mp0.s.cwl);
+       }
+
+       mp0.s.mprloc = 0;
+       mp0.s.mpr = 0;
+       mp0.s.dll = (ddr_type == DDR4_DRAM);    /* 0 for DDR3 and 1 for DDR4 */
+       mp0.s.al = 0;
+       mp0.s.wlev = 0;         /* Read Only */
+       if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
+               mp0.s.tdqs = 0;
+       else
+               mp0.s.tdqs = 1;
+       mp0.s.qoff = 0;
+
+       s = lookup_env(priv, "ddr_cl");
+       if (s) {
+               cl = simple_strtoul(s, NULL, 0);
+               debug("CAS Latency                                   : %6d\n",
+                     cl);
+       }
+
+       if (ddr_type == DDR4_DRAM) {
+               mp0.s.cl = 0x0;
+               if (cl > 9)
+                       mp0.s.cl = 0x1;
+               if (cl > 10)
+                       mp0.s.cl = 0x2;
+               if (cl > 11)
+                       mp0.s.cl = 0x3;
+               if (cl > 12)
+                       mp0.s.cl = 0x4;
+               if (cl > 13)
+                       mp0.s.cl = 0x5;
+               if (cl > 14)
+                       mp0.s.cl = 0x6;
+               if (cl > 15)
+                       mp0.s.cl = 0x7;
+               if (cl > 16)
+                       mp0.s.cl = 0x8;
+               if (cl > 18)
+                       mp0.s.cl = 0x9;
+               if (cl > 20)
+                       mp0.s.cl = 0xA;
+               if (cl > 24)
+                       mp0.s.cl = 0xB;
+       } else {
+               mp0.s.cl = 0x2;
+               if (cl > 5)
+                       mp0.s.cl = 0x4;
+               if (cl > 6)
+                       mp0.s.cl = 0x6;
+               if (cl > 7)
+                       mp0.s.cl = 0x8;
+               if (cl > 8)
+                       mp0.s.cl = 0xA;
+               if (cl > 9)
+                       mp0.s.cl = 0xC;
+               if (cl > 10)
+                       mp0.s.cl = 0xE;
+               if (cl > 11)
+                       mp0.s.cl = 0x1;
+               if (cl > 12)
+                       mp0.s.cl = 0x3;
+               if (cl > 13)
+                       mp0.s.cl = 0x5;
+               if (cl > 14)
+                       mp0.s.cl = 0x7;
+               if (cl > 15)
+                       mp0.s.cl = 0x9;
+       }
+
+       mp0.s.rbt = 0;          /* Read Only. */
+       mp0.s.tm = 0;
+       mp0.s.dllr = 0;
+
+       param = divide_roundup(twr, tclk_psecs);
+
+       if (ddr_type == DDR4_DRAM) {    /* DDR4 */
+               mp0.s.wrp = 1;
+               if (param > 12)
+                       mp0.s.wrp = 2;
+               if (param > 14)
+                       mp0.s.wrp = 3;
+               if (param > 16)
+                       mp0.s.wrp = 4;
+               if (param > 18)
+                       mp0.s.wrp = 5;
+               if (param > 20)
+                       mp0.s.wrp = 6;
+               if (param > 24) /* RESERVED in DDR4 spec */
+                       mp0.s.wrp = 7;
+       } else {                /* DDR3 */
+               mp0.s.wrp = 1;
+               if (param > 5)
+                       mp0.s.wrp = 2;
+               if (param > 6)
+                       mp0.s.wrp = 3;
+               if (param > 7)
+                       mp0.s.wrp = 4;
+               if (param > 8)
+                       mp0.s.wrp = 5;
+               if (param > 10)
+                       mp0.s.wrp = 6;
+               if (param > 12)
+                       mp0.s.wrp = 7;
+       }
+
+       mp0.s.ppd = 0;
+
+       s = lookup_env(priv, "ddr_wrp");
+       if (s)
+               mp0.s.wrp = simple_strtoul(s, NULL, 0);
+
+       debug("%-45s : %d, [0x%x]\n",
+             "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
+
+       s = lookup_env_ull(priv, "ddr_modereg_params0");
+       if (s)
+               mp0.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("MODEREG_PARAMS0                               : 0x%016llx\n",
+             mp0.u64);
+       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+}
+
+static void lmc_modereg_params1(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_modereg_params1 mp1;
+       char *s;
+       int i;
+
+       mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
+
+       /*
+        * Special request: mismatched DIMM support. Slot 0: 2-Rank,
+        * Slot 1: 1-Rank
+        */
+       if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
+               mp1.s.rtt_nom_00 = 0;
+               mp1.s.rtt_nom_01 = 3;   /* rttnom_40ohm */
+               mp1.s.rtt_nom_10 = 3;   /* rttnom_40ohm */
+               mp1.s.rtt_nom_11 = 0;
+               dyn_rtt_nom_mask = 0x6;
+       }
+
+       s = lookup_env(priv, "ddr_rtt_nom_mask");
+       if (s)
+               dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
+
+       /*
+        * Save the original rtt_nom settings before sweeping through
+        * settings.
+        */
+       default_rtt_nom[0] = mp1.s.rtt_nom_00;
+       default_rtt_nom[1] = mp1.s.rtt_nom_01;
+       default_rtt_nom[2] = mp1.s.rtt_nom_10;
+       default_rtt_nom[3] = mp1.s.rtt_nom_11;
+
+       ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
+
+       for (i = 0; i < 4; ++i) {
+               u64 value;
+
+               s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
+                              !!(i & 1));
+               if (!s)
+                       s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
+                                      !!(i & 2), !!(i & 1));
+               if (s) {
+                       value = simple_strtoul(s, NULL, 0);
+                       mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
+                       mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
+                       default_rtt_nom[i] = value;
+                       ddr_rtt_nom_auto = 0;
+               }
+       }
+
+       s = lookup_env(priv, "ddr_rtt_nom");
+       if (!s)
+               s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
+       if (s) {
+               u64 value;
+
+               value = simple_strtoul(s, NULL, 0);
+
+               if (dyn_rtt_nom_mask & 1) {
+                       default_rtt_nom[0] = value;
+                       mp1.s.rtt_nom_00 = value;
+               }
+               if (dyn_rtt_nom_mask & 2) {
+                       default_rtt_nom[1] = value;
+                       mp1.s.rtt_nom_01 = value;
+               }
+               if (dyn_rtt_nom_mask & 4) {
+                       default_rtt_nom[2] = value;
+                       mp1.s.rtt_nom_10 = value;
+               }
+               if (dyn_rtt_nom_mask & 8) {
+                       default_rtt_nom[3] = value;
+                       mp1.s.rtt_nom_11 = value;
+               }
+
+               ddr_rtt_nom_auto = 0;
+       }
+
+       for (i = 0; i < 4; ++i) {
+               u64 value;
+
+               s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
+               if (!s)
+                       s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
+                                      !!(i & 2), !!(i & 1));
+               if (s) {
+                       value = simple_strtoul(s, NULL, 0);
+                       insrt_wr(&mp1.u64, i, value);
+               }
+       }
+
+       // Make sure 78XX pass 1 has valid RTT_WR settings, because
+       // configuration files may be set-up for later chips, and
+       // 78XX pass 1 supports no RTT_WR extension bits
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+               for (i = 0; i < 4; ++i) {
+                       // if 80 or undefined
+                       if (extr_wr(mp1.u64, i) > 3) {
+                               // FIXME? always insert 120
+                               insrt_wr(&mp1.u64, i, 1);
+                               debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
+                                     !!(i & 2), i & 1);
+                       }
+               }
+       }
+
+       s = lookup_env(priv, "ddr_dic");
+       if (s) {
+               u64 value = simple_strtoul(s, NULL, 0);
+
+               for (i = 0; i < 4; ++i) {
+                       mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
+                       mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
+               }
+       }
+
+       for (i = 0; i < 4; ++i) {
+               u64 value;
+
+               s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
+               if (s) {
+                       value = simple_strtoul(s, NULL, 0);
+                       mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
+                       mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
+               }
+       }
+
+       s = lookup_env_ull(priv, "ddr_modereg_params1");
+       if (s)
+               mp1.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+             mp1.s.rtt_nom_11,
+             mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
+
+       debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
+             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
+             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
+             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
+             extr_wr(mp1.u64, 3),
+             extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
+
+       debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+             imp_val->dic_ohms[mp1.s.dic_11],
+             imp_val->dic_ohms[mp1.s.dic_10],
+             imp_val->dic_ohms[mp1.s.dic_01],
+             imp_val->dic_ohms[mp1.s.dic_00],
+             mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
+
+       debug("MODEREG_PARAMS1                               : 0x%016llx\n",
+             mp1.u64);
+       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
+}
+
+static void lmc_modereg_params2(struct ddr_priv *priv)
+{
+       char *s;
+       int i;
+
+       if (ddr_type == DDR4_DRAM) {
+               union cvmx_lmcx_modereg_params2 mp2;
+
+               mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
+
+               s = lookup_env(priv, "ddr_rtt_park");
+               if (s) {
+                       u64 value = simple_strtoul(s, NULL, 0);
+
+                       for (i = 0; i < 4; ++i) {
+                               mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
+                               mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
+                       }
+               }
+
+               for (i = 0; i < 4; ++i) {
+                       u64 value;
+
+                       s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
+                                      !!(i & 1));
+                       if (s) {
+                               value = simple_strtoul(s, NULL, 0);
+                               mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
+                               mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
+                       }
+               }
+
+               s = lookup_env_ull(priv, "ddr_modereg_params2");
+               if (s)
+                       mp2.u64 = simple_strtoull(s, NULL, 0);
+
+               debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
+                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
+                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
+                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
+                     mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
+                     mp2.s.rtt_park_00);
+
+               debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+                     mp2.s.vref_range_11,
+                     mp2.s.vref_range_10,
+                     mp2.s.vref_range_01, mp2.s.vref_range_00);
+
+               debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+                     mp2.s.vref_value_11,
+                     mp2.s.vref_value_10,
+                     mp2.s.vref_value_01, mp2.s.vref_value_00);
+
+               debug("MODEREG_PARAMS2                               : 0x%016llx\n",
+                     mp2.u64);
+               lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
+       }
+}
+
+static void lmc_modereg_params3(struct ddr_priv *priv)
+{
+       char *s;
+
+       if (ddr_type == DDR4_DRAM) {
+               union cvmx_lmcx_modereg_params3 mp3;
+
+               mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
+               /* Disable as workaround to Errata 20547 */
+               mp3.s.rd_dbi = 0;
+               mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
+                                  5ull) - 4;
+
+               s = lookup_env(priv, "ddr_rd_preamble");
+               if (s)
+                       mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
+
+               if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+                       int delay = 0;
+
+                       if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
+                               delay = 1;
+
+                       mp3.s.xrank_add_tccd_l = delay;
+                       mp3.s.xrank_add_tccd_s = delay;
+               }
+
+               lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
+               debug("MODEREG_PARAMS3                               : 0x%016llx\n",
+                     mp3.u64);
+       }
+}
+
+static void lmc_nxm(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_nxm lmc_nxm;
+       int num_bits = row_lsb + row_bits + lranks_bits - 26;
+       char *s;
+
+       lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
+
+       /* .cn78xx. */
+       if (rank_mask & 0x1)
+               lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
+       if (rank_mask & 0x2)
+               lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
+       if (rank_mask & 0x4)
+               lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
+       if (rank_mask & 0x8)
+               lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
+
+       /* Set the mask for non-existent ranks. */
+       lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
+
+       s = lookup_env_ull(priv, "ddr_nxm");
+       if (s)
+               lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("LMC_NXM                                       : 0x%016llx\n",
+             lmc_nxm.u64);
+       lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
+}
+
+static void lmc_wodt_mask(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_wodt_mask wodt_mask;
+       char *s;
+
+       wodt_mask.u64 = odt_config[odt_idx].odt_mask;
+
+       s = lookup_env_ull(priv, "ddr_wodt_mask");
+       if (s)
+               wodt_mask.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("WODT_MASK                                     : 0x%016llx\n",
+             wodt_mask.u64);
+       lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
+}
+
+static void lmc_rodt_mask(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_rodt_mask rodt_mask;
+       int rankx;
+       char *s;
+
+       rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
+
+       s = lookup_env_ull(priv, "ddr_rodt_mask");
+       if (s)
+               rodt_mask.u64 = simple_strtoull(s, NULL, 0);
+
+       debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
+       lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
+
+       dyn_rtt_nom_mask = 0;
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+               dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
+       }
+       if (num_ranks == 4) {
+               /*
+                * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
+                * ODT1 is wired to the third rank (rank 2).  The mask,
+                * dyn_rtt_nom_mask, is used to indicate for which ranks
+                * to sweep RTT_NOM during read-leveling. Shift the bit
+                * from the ODT1 position over to the "ODT2" position so
+                * that the read-leveling analysis comes out right.
+                */
+               int odt1_bit = dyn_rtt_nom_mask & 2;
+
+               dyn_rtt_nom_mask &= ~2;
+               dyn_rtt_nom_mask |= odt1_bit << 1;
+       }
+       debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
+}
+
+static void lmc_comp_ctl2(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_comp_ctl2 cc2;
+       char *s;
+
+       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+       cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
+       /* Default 4=34.3 ohm */
+       cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
+       /* Default 4=34.3 ohm */
+       cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
+       /* Default 4=34.3 ohm */
+       cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
+
+       ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
+       s = lookup_env(priv, "ddr_rodt_ctl_auto");
+       if (s)
+               ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
+
+       default_rodt_ctl = odt_config[odt_idx].qs_dic;
+       s = lookup_env(priv, "ddr_rodt_ctl");
+       if (!s)
+               s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
+       if (s) {
+               default_rodt_ctl = simple_strtoul(s, NULL, 0);
+               ddr_rodt_ctl_auto = 0;
+       }
+
+       cc2.cn70xx.rodt_ctl = default_rodt_ctl;
+
+       // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
+       // and DCLK speed is 1 GHz or more...
+       if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
+           ddr_hertz >= 1000000000) {
+               // lowest for DDR4 is 26 ohms
+               cc2.s.ck_ctl = ddr4_driver_26_ohm;
+               debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
+                     node, if_num, cc2.s.ck_ctl,
+                     imp_val->drive_strength[cc2.s.ck_ctl]);
+       }
+
+       // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
+       // if DCLK speed is 1 GHz or more...
+       if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
+           (spd_dimm_type == 2 || spd_dimm_type == 6) &&
+           ddr_hertz >= 1000000000) {
+               // lowest for DDR4 is 26 ohms
+               cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
+               // lowest for DDR4 is 26 ohms
+               cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
+               debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
+                     node, if_num, ddr4_driver_26_ohm,
+                     imp_val->drive_strength[ddr4_driver_26_ohm]);
+       }
+
+       s = lookup_env(priv, "ddr_ck_ctl");
+       if (s)
+               cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_cmd_ctl");
+       if (s)
+               cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_control_ctl");
+       if (s)
+               cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_dqx_ctl");
+       if (s)
+               cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
+
+       debug("%-45s : %d, %d ohms\n", "DQX_CTL           ", cc2.cn78xx.dqx_ctl,
+             imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
+       debug("%-45s : %d, %d ohms\n", "CK_CTL            ", cc2.cn78xx.ck_ctl,
+             imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
+       debug("%-45s : %d, %d ohms\n", "CMD_CTL           ", cc2.cn78xx.cmd_ctl,
+             imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
+       debug("%-45s : %d, %d ohms\n", "CONTROL_CTL       ",
+             cc2.cn78xx.control_ctl,
+             imp_val->drive_strength[cc2.cn78xx.control_ctl]);
+       debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+             cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+
+       debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
+       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+}
+
+static void lmc_phy_ctl(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_phy_ctl phy_ctl;
+
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+       phy_ctl.s.ts_stagger = 0;
+       // FIXME: are there others TBD?
+       phy_ctl.s.dsk_dbg_overwrt_ena = 0;
+
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
+               // C0 is TEN, C1 is A17
+               phy_ctl.s.c0_sel = 2;
+               phy_ctl.s.c1_sel = 2;
+               debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
+                     node, if_num, phy_ctl.s.c1_sel);
+       }
+
+       debug("PHY_CTL                                       : 0x%016llx\n",
+             phy_ctl.u64);
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+}
+
+static void lmc_ext_config(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_ext_config ext_cfg;
+       char *s;
+
+       ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+       ext_cfg.s.vrefint_seq_deskew = 0;
+       ext_cfg.s.read_ena_bprch = 1;
+       ext_cfg.s.read_ena_fprch = 1;
+       ext_cfg.s.drive_ena_fprch = 1;
+       ext_cfg.s.drive_ena_bprch = 1;
+       // make sure this is OFF for all current chips
+       ext_cfg.s.invert_data = 0;
+
+       s = lookup_env(priv, "ddr_read_fprch");
+       if (s)
+               ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_read_bprch");
+       if (s)
+               ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_drive_fprch");
+       if (s)
+               ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_drive_bprch");
+       if (s)
+               ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
+
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
+               ext_cfg.s.dimm0_cid = lranks_bits;
+               ext_cfg.s.dimm1_cid = lranks_bits;
+               debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
+                     node, if_num, ext_cfg.s.dimm0_cid);
+       }
+
+       lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
+       debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
+}
+
+static void lmc_ext_config2(struct ddr_priv *priv)
+{
+       char *s;
+
+       // NOTE: all chips have this register, but not necessarily the
+       // fields we modify...
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
+           !octeon_is_cpuid(OCTEON_CN73XX)) {
+               union cvmx_lmcx_ext_config2 ext_cfg2;
+               int value = 1;  // default to 1
+
+               ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
+
+               s = lookup_env(priv, "ddr_ext2_delay_unload");
+               if (s)
+                       value = !!simple_strtoul(s, NULL, 0);
+
+               ext_cfg2.s.delay_unload_r0 = value;
+               ext_cfg2.s.delay_unload_r1 = value;
+               ext_cfg2.s.delay_unload_r2 = value;
+               ext_cfg2.s.delay_unload_r3 = value;
+
+               lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
+               debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
+       }
+}
+
+static void lmc_dimm01_params_loop(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_dimmx_params dimm_p;
+       int dimmx = didx;
+       char *s;
+       int rc;
+       int i;
+
+       dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
+
+       if (ddr_type == DDR4_DRAM) {
+               union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
+               union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
+               union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
+
+               dimm_p.s.rc0 = 0;
+               dimm_p.s.rc1 = 0;
+               dimm_p.s.rc2 = 0;
+
+               rc = read_spd(&dimm_config_table[didx], 0,
+                             DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
+               dimm_p.s.rc3 = (rc >> 4) & 0xf;
+               dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
+               dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
+
+               rc = read_spd(&dimm_config_table[didx], 0,
+                             DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
+               dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
+               dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
+
+               dimm_p.s.rc6 = 0;
+               dimm_p.s.rc7 = 0;
+               dimm_p.s.rc8 = 0;
+               dimm_p.s.rc9 = 0;
+
+               /*
+                * rc10               DDR4 RDIMM Operating Speed
+                * ===  ===================================================
+                *  0               tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
+                *  1   1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
+                *  2   1071 psec > tclk_psecs >=  938 psec DDR4-2133 ( 938 ps)
+                *  3    938 psec > tclk_psecs >=  833 psec DDR4-2400 ( 833 ps)
+                *  4    833 psec > tclk_psecs >=  750 psec DDR4-2666 ( 750 ps)
+                *  5    750 psec > tclk_psecs >=  625 psec DDR4-3200 ( 625 ps)
+                */
+               dimm_p.s.rc10 = 0;
+               if (tclk_psecs < 1250)
+                       dimm_p.s.rc10 = 1;
+               if (tclk_psecs < 1071)
+                       dimm_p.s.rc10 = 2;
+               if (tclk_psecs < 938)
+                       dimm_p.s.rc10 = 3;
+               if (tclk_psecs < 833)
+                       dimm_p.s.rc10 = 4;
+               if (tclk_psecs < 750)
+                       dimm_p.s.rc10 = 5;
+
+               dimm_p.s.rc11 = 0;
+               dimm_p.s.rc12 = 0;
+               /* 0=LRDIMM, 1=RDIMM */
+               dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
+               dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
+                       (spd_addr_mirror << 3) : 0;
+               dimm_p.s.rc14 = 0;
+               dimm_p.s.rc15 = 0;      /* 1 nCK latency adder */
+
+               ddr4_p0.u64 = 0;
+
+               ddr4_p0.s.rc8x = 0;
+               ddr4_p0.s.rc7x = 0;
+               ddr4_p0.s.rc6x = 0;
+               ddr4_p0.s.rc5x = 0;
+               ddr4_p0.s.rc4x = 0;
+
+               ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
+
+               ddr4_p0.s.rc2x = 0;
+               ddr4_p0.s.rc1x = 0;
+
+               ddr4_p1.u64 = 0;
+
+               ddr4_p1.s.rcbx = 0;
+               ddr4_p1.s.rcax = 0;
+               ddr4_p1.s.rc9x = 0;
+
+               ddr4_ctl.u64 = 0;
+               ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
+               ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
+                   (dimm_count > 1) ? 0x004 : 0x0000;
+
+               /*
+                * Handle any overrides from envvars here...
+                */
+               s = lookup_env(priv, "ddr_ddr4_params0");
+               if (s)
+                       ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
+
+               s = lookup_env(priv, "ddr_ddr4_params1");
+               if (s)
+                       ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
+
+               s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
+               if (s)
+                       ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
+
+               for (i = 0; i < 11; ++i) {
+                       u64 value;
+
+                       s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
+                       if (s) {
+                               value = simple_strtoul(s, NULL, 0);
+                               if (i < 8) {
+                                       ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
+                                       ddr4_p0.u64 |= (value << (i * 8));
+                               } else {
+                                       ddr4_p1.u64 &=
+                                           ~((u64)0xff << ((i - 8) * 8));
+                                       ddr4_p1.u64 |= (value << ((i - 8) * 8));
+                               }
+                       }
+               }
+
+               /*
+                * write the final CSR values
+                */
+               lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
+                      ddr4_p0.u64);
+
+               lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
+
+               lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
+                      ddr4_p1.u64);
+
+               debug("DIMM%d Register Control Words        RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
+                     dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
+                     ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
+                     ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
+                     ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
+                     ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
+
+       } else {
+               rc = read_spd(&dimm_config_table[didx], 0, 69);
+               dimm_p.s.rc0 = (rc >> 0) & 0xf;
+               dimm_p.s.rc1 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 70);
+               dimm_p.s.rc2 = (rc >> 0) & 0xf;
+               dimm_p.s.rc3 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 71);
+               dimm_p.s.rc4 = (rc >> 0) & 0xf;
+               dimm_p.s.rc5 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 72);
+               dimm_p.s.rc6 = (rc >> 0) & 0xf;
+               dimm_p.s.rc7 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 73);
+               dimm_p.s.rc8 = (rc >> 0) & 0xf;
+               dimm_p.s.rc9 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 74);
+               dimm_p.s.rc10 = (rc >> 0) & 0xf;
+               dimm_p.s.rc11 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 75);
+               dimm_p.s.rc12 = (rc >> 0) & 0xf;
+               dimm_p.s.rc13 = (rc >> 4) & 0xf;
+
+               rc = read_spd(&dimm_config_table[didx], 0, 76);
+               dimm_p.s.rc14 = (rc >> 0) & 0xf;
+               dimm_p.s.rc15 = (rc >> 4) & 0xf;
+
+               s = ddr_getenv_debug(priv, "ddr_clk_drive");
+               if (s) {
+                       if (strcmp(s, "light") == 0)
+                               dimm_p.s.rc5 = 0x0;     /* Light Drive */
+                       if (strcmp(s, "moderate") == 0)
+                               dimm_p.s.rc5 = 0x5;     /* Moderate Drive */
+                       if (strcmp(s, "strong") == 0)
+                               dimm_p.s.rc5 = 0xA;     /* Strong Drive */
+                       printf("Parameter found in environment. ddr_clk_drive = %s\n",
+                              s);
+               }
+
+               s = ddr_getenv_debug(priv, "ddr_cmd_drive");
+               if (s) {
+                       if (strcmp(s, "light") == 0)
+                               dimm_p.s.rc3 = 0x0;     /* Light Drive */
+                       if (strcmp(s, "moderate") == 0)
+                               dimm_p.s.rc3 = 0x5;     /* Moderate Drive */
+                       if (strcmp(s, "strong") == 0)
+                               dimm_p.s.rc3 = 0xA;     /* Strong Drive */
+                       printf("Parameter found in environment. ddr_cmd_drive = %s\n",
+                              s);
+               }
+
+               s = ddr_getenv_debug(priv, "ddr_ctl_drive");
+               if (s) {
+                       if (strcmp(s, "light") == 0)
+                               dimm_p.s.rc4 = 0x0;     /* Light Drive */
+                       if (strcmp(s, "moderate") == 0)
+                               dimm_p.s.rc4 = 0x5;     /* Moderate Drive */
+                       printf("Parameter found in environment. ddr_ctl_drive = %s\n",
+                              s);
+               }
+
+               /*
+                * rc10               DDR3 RDIMM Operating Speed
+                * ==   =====================================================
+                *  0               tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
+                *  1   2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
+                *  2   1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
+                *  3   1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
+                *  4   1250 psec > tclk_psecs >= 1071 psec DDR3-1866
+                */
+               dimm_p.s.rc10 = 0;
+               if (tclk_psecs < 2500)
+                       dimm_p.s.rc10 = 1;
+               if (tclk_psecs < 1875)
+                       dimm_p.s.rc10 = 2;
+               if (tclk_psecs < 1500)
+                       dimm_p.s.rc10 = 3;
+               if (tclk_psecs < 1250)
+                       dimm_p.s.rc10 = 4;
+       }
+
+       s = lookup_env(priv, "ddr_dimmx_params", i);
+       if (s)
+               dimm_p.u64 = simple_strtoul(s, NULL, 0);
+
+       for (i = 0; i < 16; ++i) {
+               u64 value;
+
+               s = lookup_env(priv, "ddr_rc%d", i);
+               if (s) {
+                       value = simple_strtoul(s, NULL, 0);
+                       dimm_p.u64 &= ~((u64)0xf << (i * 4));
+                       dimm_p.u64 |= (value << (i * 4));
+               }
+       }
+
+       lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
+
+       debug("DIMM%d Register Control Words         RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
+             dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
+             dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
+             dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
+             dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
+             dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
+
+       // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
+       // and treat it specially
+       if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
+           spd_rdimm_registers == 2 && dimmx == 0) {
+               debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
+               lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
+       }
+}
+
+static void lmc_dimm01_params(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_dimm_ctl dimm_ctl;
+       char *s;
+
+       if (spd_rdimm) {
+               for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
+                       lmc_dimm01_params_loop(priv);
+
+               if (ddr_type == DDR4_DRAM) {
+                       /* LMC0_DIMM_CTL */
+                       dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+                       dimm_ctl.s.dimm0_wmask = 0xdf3f;
+                       dimm_ctl.s.dimm1_wmask =
+                           (dimm_count > 1) ? 0xdf3f : 0x0000;
+                       dimm_ctl.s.tcws = 0x4e0;
+                       dimm_ctl.s.parity = c_cfg->parity;
+
+                       s = lookup_env(priv, "ddr_dimm0_wmask");
+                       if (s) {
+                               dimm_ctl.s.dimm0_wmask =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_dimm1_wmask");
+                       if (s) {
+                               dimm_ctl.s.dimm1_wmask =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_dimm_ctl_parity");
+                       if (s)
+                               dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
+
+                       s = lookup_env(priv, "ddr_dimm_ctl_tcws");
+                       if (s)
+                               dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
+
+                       debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+                             dimm_ctl.u64);
+                       lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+                       /* Init RCW */
+                       oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+
+                       /* Write RC0D last */
+                       dimm_ctl.s.dimm0_wmask = 0x2000;
+                       dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
+                               0x2000 : 0x0000;
+                       debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+                             dimm_ctl.u64);
+                       lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+                       /*
+                        * Don't write any extended registers the second time
+                        */
+                       lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
+
+                       /* Init RCW */
+                       oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+               } else {
+                       /* LMC0_DIMM_CTL */
+                       dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+                       dimm_ctl.s.dimm0_wmask = 0xffff;
+                       // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
+                       // registers, and treat it specially
+                       if (num_ranks == 4 && spd_rdimm_registers == 2) {
+                               debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
+                               dimm_ctl.s.dimm1_wmask = 0xffff;
+                       } else {
+                               dimm_ctl.s.dimm1_wmask =
+                                   (dimm_count > 1) ? 0xffff : 0x0000;
+                       }
+                       dimm_ctl.s.tcws = 0x4e0;
+                       dimm_ctl.s.parity = c_cfg->parity;
+
+                       s = lookup_env(priv, "ddr_dimm0_wmask");
+                       if (s) {
+                               dimm_ctl.s.dimm0_wmask =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_dimm1_wmask");
+                       if (s) {
+                               dimm_ctl.s.dimm1_wmask =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_dimm_ctl_parity");
+                       if (s)
+                               dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
+
+                       s = lookup_env(priv, "ddr_dimm_ctl_tcws");
+                       if (s)
+                               dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
+
+                       debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+                             dimm_ctl.u64);
+                       lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+                       /* Init RCW */
+                       oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+               }
+
+       } else {
+               /* Disable register control writes for unbuffered */
+               union cvmx_lmcx_dimm_ctl dimm_ctl;
+
+               dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+               dimm_ctl.s.dimm0_wmask = 0;
+               dimm_ctl.s.dimm1_wmask = 0;
+               lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+       }
+}
+
+static int lmc_rank_init(struct ddr_priv *priv)
+{
+       char *s;
+
+       if (enable_by_rank_init) {
+               by_rank = 3;
+               saved_rank_mask = rank_mask;
+       }
+
+start_by_rank_init:
+
+       if (enable_by_rank_init) {
+               rank_mask = (1 << by_rank);
+               if (!(rank_mask & saved_rank_mask))
+                       goto end_by_rank_init;
+               if (by_rank == 0)
+                       rank_mask = saved_rank_mask;
+
+               debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
+                     by_rank, rank_mask);
+       }
+
+       /*
+        * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
+        */
+       union cvmx_lmcx_modereg_params0 mp0;
+
+       if (ddr_memory_preserved(priv)) {
+               /*
+                * Contents are being preserved. Take DRAM out of self-refresh
+                * first. Then init steps can procede normally
+                */
+               /* self-refresh exit */
+               oct3_ddr3_seq(priv, rank_mask, if_num, 3);
+       }
+
+       mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+       mp0.s.dllr = 1;         /* Set during first init sequence */
+       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+
+       ddr_init_seq(priv, rank_mask, if_num);
+
+       mp0.s.dllr = 0;         /* Clear for normal operation */
+       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+
+       if (spd_rdimm && ddr_type == DDR4_DRAM &&
+           octeon_is_cpuid(OCTEON_CN7XXX)) {
+               debug("Running init sequence 1\n");
+               change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
+       }
+
+       memset(lanes, 0, sizeof(lanes));
+       for (lane = 0; lane < last_lane; lane++) {
+               // init all lanes to reset value
+               dac_settings[lane] = 127;
+       }
+
+       // FIXME: disable internal VREF if deskew is disabled?
+       if (disable_deskew_training) {
+               debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
+                     node, if_num);
+               num_samples = 0;
+       } else if (ddr_type == DDR4_DRAM &&
+                  !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+               num_samples = DEFAULT_DAC_SAMPLES;
+       } else {
+               // if DDR3 or no ability to write DAC values
+               num_samples = 1;
+       }
+
+perform_internal_vref_training:
+
+       total_dac_eval_retries = 0;
+       dac_eval_exhausted = 0;
+
+       for (sample = 0; sample < num_samples; sample++) {
+               dac_eval_retries = 0;
+
+               // make offset and internal vref training repeatable
+               do {
+                       /*
+                        * 6.9.8 LMC Offset Training
+                        * LMC requires input-receiver offset training.
+                        */
+                       perform_offset_training(priv, rank_mask, if_num);
+
+                       /*
+                        * 6.9.9 LMC Internal vref Training
+                        * LMC requires input-reference-voltage training.
+                        */
+                       perform_internal_vref_training(priv, rank_mask, if_num);
+
+                       // read and maybe display the DAC values for a sample
+                       read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
+                                             dac_settings);
+                       if (num_samples == 1 || ddr_verbose(priv)) {
+                               display_dac_dbi_settings(if_num, /*DAC*/ 1,
+                                                        use_ecc, dac_settings,
+                                                        "Internal VREF");
+                       }
+
+                       // for DDR4, evaluate the DAC settings and retry
+                       // if any issues
+                       if (ddr_type == DDR4_DRAM) {
+                               if (evaluate_dac_settings
+                                   (if_64b, use_ecc, dac_settings)) {
+                                       dac_eval_retries += 1;
+                                       if (dac_eval_retries >
+                                           DAC_RETRIES_LIMIT) {
+                                               debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
+                                                     node, if_num);
+                                               dac_eval_exhausted += 1;
+                                       } else {
+                                               debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
+                                                     node, if_num);
+                                               total_dac_eval_retries += 1;
+                                               // try another sample
+                                               continue;
+                                       }
+                               }
+
+                               // taking multiple samples, otherwise do nothing
+                               if (num_samples > 1) {
+                                       // good sample or exhausted retries,
+                                       // record it
+                                       for (lane = 0; lane < last_lane;
+                                            lane++) {
+                                               lanes[lane].bytes[sample] =
+                                                   dac_settings[lane];
+                                       }
+                               }
+                       }
+                       // done if DDR3, or good sample, or exhausted retries
+                       break;
+               } while (1);
+       }
+
+       if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
+               debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
+                     node, if_num, total_dac_eval_retries, dac_eval_exhausted);
+       }
+
+       if (num_samples > 1) {
+               debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
+                     node, if_num);
+
+               for (lane = 0; lane < last_lane; lane++) {
+                       dac_settings[lane] =
+                           process_samples_average(&lanes[lane].bytes[0],
+                                                   num_samples, if_num, lane);
+               }
+               display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
+                                        dac_settings, "Averaged VREF");
+
+               // finally, write the final DAC values
+               for (lane = 0; lane < last_lane; lane++) {
+                       load_dac_override(priv, if_num, dac_settings[lane],
+                                         lane);
+               }
+       }
+
+       // allow override of any byte-lane internal VREF
+       int overrode_vref_dac = 0;
+
+       for (lane = 0; lane < last_lane; lane++) {
+               s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
+               if (s) {
+                       dac_settings[lane] = simple_strtoul(s, NULL, 0);
+                       overrode_vref_dac = 1;
+                       // finally, write the new DAC value
+                       load_dac_override(priv, if_num, dac_settings[lane],
+                                         lane);
+               }
+       }
+       if (overrode_vref_dac) {
+               display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
+                                        dac_settings, "Override VREF");
+       }
+
+       // as a second step, after internal VREF training, before starting
+       // deskew training:
+       // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
+       // to 127
+       if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
+           !disable_deskew_training) {
+               load_dac_override(priv, if_num, 127, /* all */ 0x0A);
+               debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
+                     node, if_num);
+       }
+
+       /*
+        * 4.8.8 LMC Deskew Training
+        *
+        * LMC requires input-read-data deskew training.
+        */
+       if (!disable_deskew_training) {
+               deskew_training_errors =
+                   perform_deskew_training(priv, rank_mask, if_num,
+                                           spd_rawcard_aorb);
+
+               // All the Deskew lock and saturation retries (may) have
+               // been done, but we ended up with nibble errors; so,
+               // as a last ditch effort, try the Internal vref
+               // Training again...
+               if (deskew_training_errors) {
+                       if (internal_retries <
+                           DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
+                               internal_retries++;
+                               debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
+                                     node, if_num, internal_retries);
+                               goto perform_internal_vref_training;
+                       } else {
+                               if (restart_if_dsk_incomplete) {
+                                       debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
+                                             node, if_num, internal_retries);
+                                       return -EAGAIN;
+                               }
+                               debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
+                                     node, if_num, internal_retries);
+                       }
+               }               /* if (deskew_training_errors) */
+
+               // FIXME: treat this as the final DSK print from now on,
+               // and print if VBL_NORM or above also, save the results
+               // of the original training in case we want them later
+               validate_deskew_training(priv, rank_mask, if_num,
+                                        &deskew_training_results, 1);
+       } else {                /* if (! disable_deskew_training) */
+               debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
+                     node, if_num);
+               validate_deskew_training(priv, rank_mask, if_num,
+                                        &deskew_training_results, 1);
+       }                       /* if (! disable_deskew_training) */
+
+       if (enable_by_rank_init) {
+               read_dac_dbi_settings(priv, if_num, /*dac */ 1,
+                                     &rank_dac[by_rank].bytes[0]);
+               get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
+               debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
+       }
+
+end_by_rank_init:
+
+       if (enable_by_rank_init) {
+               //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
+
+               by_rank--;
+               if (by_rank >= 0)
+                       goto start_by_rank_init;
+
+               rank_mask = saved_rank_mask;
+               ddr_init_seq(priv, rank_mask, if_num);
+
+               process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
+               process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
+
+               // FIXME: set this to prevent later checking!!!
+               disable_deskew_training = 1;
+
+               debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
+       }
+
+       return 0;
+}
+
+static void lmc_config_2(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_config lmc_config;
+       int save_ref_zqcs_int;
+       u64 temp_delay_usecs;
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+       /*
+        * Temporarily select the minimum ZQCS interval and wait
+        * long enough for a few ZQCS calibrations to occur.  This
+        * should ensure that the calibration circuitry is
+        * stabilized before read/write leveling occurs.
+        */
+       if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+               save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
+               /* set smallest interval */
+               lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
+       } else {
+               save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
+               /* set smallest interval */
+               lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
+       }
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+       lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+       /*
+        * Compute an appropriate delay based on the current ZQCS
+        * interval. The delay should be long enough for the
+        * current ZQCS delay counter to expire plus ten of the
+        * minimum intarvals to ensure that some calibrations
+        * occur.
+        */
+       temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
+                           100 * 512 * 128) / (10000 * 10000) + 10 *
+               ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
+
+       debug("Waiting %lld usecs for ZQCS calibrations to start\n",
+             temp_delay_usecs);
+       udelay(temp_delay_usecs);
+
+       if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+               /* Restore computed interval */
+               lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
+       } else {
+               /* Restore computed interval */
+               lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
+       }
+
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+       lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+}
+
+static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
+static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
+static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
+
+static int wl_mask[9] __section(".data");
+static int byte_idx __section(".data");
+static int ecc_ena __section(".data");
+static int wl_roundup __section(".data");
+static int save_mode32b __section(".data");
+static int disable_hwl_validity __section(".data");
+static int default_wl_rtt_nom __section(".data");
+static int wl_pbm_pump __section(".data");
+
+static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
+{
+       int wloop = 0;
+       // retries per sample for HW-related issues with bitmasks or values
+       int wloop_retries = 0;
+       int wloop_retries_total = 0;
+       int wloop_retries_exhausted = 0;
+#define WLOOP_RETRIES_DEFAULT 5
+       int wl_val_err;
+       int wl_mask_err_rank = 0;
+       int wl_val_err_rank = 0;
+       // array to collect counts of byte-lane values
+       // assume low-order 3 bits and even, so really only 2-bit values
+       struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
+       int extra_bumps, extra_mask;
+       int rank_nom = 0;
+
+       if (!(rank_mask & (1 << rankx)))
+               return;
+
+       if (match_wl_rtt_nom) {
+               if (rankx == 0)
+                       rank_nom = mp1.s.rtt_nom_00;
+               if (rankx == 1)
+                       rank_nom = mp1.s.rtt_nom_01;
+               if (rankx == 2)
+                       rank_nom = mp1.s.rtt_nom_10;
+               if (rankx == 3)
+                       rank_nom = mp1.s.rtt_nom_11;
+
+               debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
+                     node, if_num, rankx, rank_nom,
+                     imp_val->rtt_nom_ohms[rank_nom]);
+       }
+
+       memset(wl_bytes, 0, sizeof(wl_bytes));
+       memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
+
+       // restructure the looping so we can keep trying until we get the
+       // samples we want
+       while (wloop < wl_loops) {
+               wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
+
+               wl_ctl.cn78xx.rtt_nom =
+                   (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
+
+               if (match_wl_rtt_nom) {
+                       wl_ctl.cn78xx.rtt_nom =
+                           (rank_nom > 0) ? (rank_nom - 1) : 7;
+               }
+
+               /* Clear write-level delays */
+               lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
+
+               wl_mask_err = 0;        /* Reset error counters */
+               wl_val_err = 0;
+
+               for (byte_idx = 0; byte_idx < 9; ++byte_idx)
+                       wl_mask[byte_idx] = 0;  /* Reset bitmasks */
+
+               // do all the byte-lanes at the same time
+               wl_ctl.cn78xx.lanemask = 0x1ff;
+
+               lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
+
+               /*
+                * Read and write values back in order to update the
+                * status field. This insures that we read the updated
+                * values after write-leveling has completed.
+                */
+               lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                      lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
+
+               /* write-leveling */
+               oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
+
+               do {
+                       wl_rank.u64 = lmc_rd(priv,
+                                            CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                   if_num));
+               } while (wl_rank.cn78xx.status != 3);
+
+               wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                 if_num));
+
+               for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+                       wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
+                                                                if_num,
+                                                                byte_idx);
+                       if (wl_mask[byte_idx] == 0)
+                               ++wl_mask_err;
+               }
+
+               // check validity only if no bitmask errors
+               if (wl_mask_err == 0) {
+                       if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
+                           dram_width != 16 && if_64b &&
+                           !disable_hwl_validity) {
+                               // bypass if [mini|SO]-[RU]DIMM or x16 or
+                               // 32-bit
+                               wl_val_err =
+                                   validate_hw_wl_settings(if_num,
+                                                           &wl_rank,
+                                                           spd_rdimm, ecc_ena);
+                               wl_val_err_rank += (wl_val_err != 0);
+                       }
+               } else {
+                       wl_mask_err_rank++;
+               }
+
+               // before we print, if we had bitmask or validity errors,
+               // do a retry...
+               if (wl_mask_err != 0 || wl_val_err != 0) {
+                       if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
+                               wloop_retries++;
+                               wloop_retries_total++;
+                               // this printout is per-retry: only when VBL
+                               // is high enough (DEV?)
+                               // FIXME: do we want to show the bad bitmaps
+                               // or delays here also?
+                               debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
+                                     node, if_num, rankx,
+                                     (wl_mask_err) ? "Bitmask" : "Validity");
+                               // this takes us back to the top without
+                               // counting a sample
+                               return;
+                       }
+
+                       // retries exhausted, do not print at normal VBL
+                       debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
+                             node, if_num, rankx,
+                             (wl_mask_err) ? "Bitmask" : "Validity");
+                       wloop_retries_exhausted++;
+               }
+               // no errors or exhausted retries, use this sample
+               wloop_retries = 0;      //reset for next sample
+
+               // when only 1 sample or forced, print the bitmasks then
+               // current HW WL
+               if (wl_loops == 1 || wl_print) {
+                       if (wl_print > 1)
+                               display_wl_bm(if_num, rankx, wl_mask);
+                       display_wl(if_num, wl_rank, rankx);
+               }
+
+               if (wl_roundup) {       /* Round up odd bitmask delays */
+                       for (byte_idx = 0; byte_idx < (8 + ecc_ena);
+                            ++byte_idx) {
+                               if (!(if_bytemask & (1 << byte_idx)))
+                                       return;
+                               upd_wl_rank(&wl_rank, byte_idx,
+                                           roundup_ddr3_wlevel_bitmask
+                                           (wl_mask[byte_idx]));
+                       }
+                       lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                              wl_rank.u64);
+                       display_wl(if_num, wl_rank, rankx);
+               }
+
+               // OK, we have a decent sample, no bitmask or validity errors
+               extra_bumps = 0;
+               extra_mask = 0;
+               for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+                       int ix;
+
+                       if (!(if_bytemask & (1 << byte_idx)))
+                               return;
+
+                       // increment count of byte-lane value
+                       // only 4 values
+                       ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
+                       wl_bytes[byte_idx].bitcnt[ix]++;
+                       wl_bytes_extra[byte_idx].bitcnt[ix]++;
+                       // if perfect...
+                       if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
+                               wl_bytes_extra[byte_idx].bitcnt[ix] +=
+                                   wl_pbm_pump;
+                               extra_bumps++;
+                               extra_mask |= 1 << byte_idx;
+                       }
+               }
+
+               if (extra_bumps) {
+                       if (wl_print > 1) {
+                               debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
+                                     node, if_num, rankx, extra_bumps,
+                                     extra_mask);
+                       }
+               }
+
+               // if we get here, we have taken a decent sample
+               wloop++;
+
+       }                       /* while (wloop < wl_loops) */
+
+       // if we did sample more than once, try to pick a majority vote
+       if (wl_loops > 1) {
+               // look for the majority in each byte-lane
+               for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+                       int mx, mc, xc, cc;
+                       int ix, alts;
+                       int maj, xmaj, xmx, xmc, xxc, xcc;
+
+                       if (!(if_bytemask & (1 << byte_idx)))
+                               return;
+                       maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
+                                              &mc, &xc, &cc);
+                       xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
+                                               &xmx, &xmc, &xxc, &xcc);
+                       if (maj != xmaj) {
+                               if (wl_print) {
+                                       debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
+                                             node, if_num, rankx,
+                                             byte_idx, maj, xc, xmaj, xxc);
+                               }
+                               mx = xmx;
+                               mc = xmc;
+                               xc = xxc;
+                               cc = xcc;
+                       }
+
+                       // see if there was an alternate
+                       // take out the majority choice
+                       alts = (mc & ~(1 << mx));
+                       if (alts != 0) {
+                               for (ix = 0; ix < 4; ix++) {
+                                       // FIXME: could be done multiple times?
+                                       // bad if so
+                                       if (alts & (1 << ix)) {
+                                               // set the mask
+                                               hwl_alts[rankx].hwl_alt_mask |=
+                                                       (1 << byte_idx);
+                                               // record the value
+                                               hwl_alts[rankx].hwl_alt_delay[byte_idx] =
+                                                       ix << 1;
+                                               if (wl_print > 1) {
+                                                       debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
+                                                             node,
+                                                             if_num,
+                                                             rankx,
+                                                             byte_idx,
+                                                             mx << 1,
+                                                             xc,
+                                                             ix << 1,
+                                                             wl_bytes
+                                                             [byte_idx].bitcnt
+                                                             [ix]);
+                                               }
+                                       }
+                               }
+                       }
+
+                       if (cc > 2) {   // unlikely, but...
+                               // assume: counts for 3 indices are all 1
+                               // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
+                               // and the desired?:   2  ,   4  ,     6, 0
+                               // we choose the middle, assuming one of the
+                               // outliers is bad
+                               // NOTE: this is an ugly hack at the moment;
+                               // there must be a better way
+                               switch (mc) {
+                               case 0x7:
+                                       mx = 1;
+                                       break;  // was 0/2/4, choose 2
+                               case 0xb:
+                                       mx = 0;
+                                       break;  // was 0/2/6, choose 0
+                               case 0xd:
+                                       mx = 3;
+                                       break;  // was 0/4/6, choose 6
+                               case 0xe:
+                                       mx = 2;
+                                       break;  // was 2/4/6, choose 4
+                               default:
+                               case 0xf:
+                                       mx = 1;
+                                       break;  // was 0/2/4/6, choose 2?
+                               }
+                               printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
+                                      node, if_num, rankx, byte_idx, mc,
+                                      mx << 1);
+                       }
+                       upd_wl_rank(&wl_rank, byte_idx, mx << 1);
+               }
+
+               lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                      wl_rank.u64);
+               display_wl_with_final(if_num, wl_rank, rankx);
+
+               // FIXME: does this help make the output a little easier
+               // to focus?
+               if (wl_print > 0)
+                       debug("-----------\n");
+
+       }                       /* if (wl_loops > 1) */
+
+       // maybe print an error summary for the rank
+       if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
+               debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
+                     node, if_num, rankx, wl_mask_err_rank,
+                     wl_val_err_rank, wloop_retries_total,
+                     wloop_retries_exhausted);
+       }
+}
+
+static void lmc_write_leveling(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_config cfg;
+       int rankx;
+       char *s;
+
+       /*
+        * 4.8.9 LMC Write Leveling
+        *
+        * LMC supports an automatic write leveling like that described in the
+        * JEDEC DDR3 specifications separately per byte-lane.
+        *
+        * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
+        * must be completed prior to starting this LMC write-leveling sequence.
+        *
+        * There are many possible procedures that will write-level all the
+        * attached DDR3 DRAM parts. One possibility is for software to simply
+        * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
+        * describes one possible sequence that uses LMC's autowrite-leveling
+        * capabilities.
+        *
+        * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
+        *    delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
+        *    point.
+        *
+        * Do the remaining steps 2-7 separately for each rank i with attached
+        * DRAM.
+        *
+        * 2. Write LMC(0)_WLEVEL_RANKi = 0.
+        *
+        * 3. For x8 parts:
+        *
+        *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+        *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
+        *    DRAM.
+        *
+        *    For x16 parts:
+        *
+        *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+        *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
+        *    attached DRAM.
+        *
+        * 4. Without changing any other fields in LMC(0)_CONFIG,
+        *
+        *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
+        *
+        *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+        *
+        *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
+        *
+        *    LMC will initiate write-leveling at this point. Assuming
+        *    LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
+        *    the selected DRAM rank via a DDR3 MR1 write, then sequences
+        *    through
+        *    and accumulates write-leveling results for eight different delay
+        *    settings twice, starting at a delay of zero in this case since
+        *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
+        *    setting, covering a total distance of one CK, then disables the
+        *    write-leveling via another DDR3 MR1 write.
+        *
+        *    After the sequence through 16 delay settings is complete:
+        *
+        *    o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
+        *
+        *    o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
+        *      by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
+        *      leveling result of 1 that followed result of 0 during the
+        *      sequence, except that the LMC always writes
+        *      LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
+        *
+        *    o Software can read the eight write-leveling results from the
+        *      first pass through the delay settings by reading
+        *      LMC(0)_WLEVEL_DBG[BITMASK] (after writing
+        *      LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
+        *      results from the second pass through the eight delay
+        *      settings. They should often be identical to the
+        *      LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
+        *
+        * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
+        *
+        *    LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
+        *    lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
+        *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
+        *    software wrote in substep 2 above, which is 0.
+        *
+        * 6. For x16 parts:
+        *
+        *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+        *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
+        *    attached DRAM.
+        *
+        *    Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
+        *    setting. Skip to substep 7 if this has already been done.
+        *
+        *    For x8 parts:
+        *
+        *    Skip this substep. Go to substep 7.
+        *
+        * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
+        *    lanes on all ranks with attached DRAM.
+        *
+        *    At this point, all byte lanes on rank i with attached DRAM should
+        *    have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
+        *    the result for each byte lane.
+        *
+        *    But note that the DDR3 write-leveling sequence will only determine
+        *    the delay modulo the CK cycle time, and cannot determine how many
+        *    additional CK cycles of delay are present. Software must calculate
+        *    the number of CK cycles, or equivalently, the
+        *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
+        *
+        *    This BYTE*<4:3> calculation is system/board specific.
+        *
+        * Many techniques can be used to calculate write-leveling BYTE*<4:3>
+        * values, including:
+        *
+        *    o Known values for some byte lanes.
+        *
+        *    o Relative values for some byte lanes relative to others.
+        *
+        *    For example, suppose lane X is likely to require a larger
+        *    write-leveling delay than lane Y. A BYTEX<2:0> value that is much
+        *    smaller than the BYTEY<2:0> value may then indicate that the
+        *    required lane X delay wrapped into the next CK, so BYTEX<4:3>
+        *    should be set to BYTEY<4:3>+1.
+        *
+        *    When ECC DRAM is not present (i.e. when DRAM is not attached to
+        *    the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
+        *    DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
+        *    LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
+        *    using the final calculated BYTE0 value.
+        *    Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
+        *    using the final calculated BYTE0 value.
+        *
+        * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
+        *
+        *    Let rank i be a rank with attached DRAM.
+        *
+        *    For all ranks j that do not have attached DRAM, set
+        *    LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
+        */
+
+       rankx = 0;
+       wl_roundup = 0;
+       disable_hwl_validity = 0;
+
+       // wl_pbm_pump: weight for write-leveling PBMs...
+       // 0 causes original behavior
+       // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
+       // 4 would allow a minority of 1 pbm to outscore a majority of 4
+       // non-pbms
+       wl_pbm_pump = 4;        // FIXME: is 4 too much?
+
+       if (wl_loops) {
+               debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
+                     if_num);
+       } else {
+               /* Force software write-leveling to run */
+               wl_mask_err = 1;
+               debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
+                     if_num);
+       }
+
+       default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
+               rttnom_20ohm : ddr4_rttnom_40ohm;
+
+       cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       ecc_ena = cfg.s.ecc_ena;
+       save_mode32b = cfg.cn78xx.mode32b;
+       cfg.cn78xx.mode32b = (!if_64b);
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+       debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+       s = lookup_env(priv, "ddr_wlevel_roundup");
+       if (s)
+               wl_roundup = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_wlevel_printall");
+       if (s)
+               wl_print = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_wlevel_pbm_bump");
+       if (s)
+               wl_pbm_pump = strtoul(s, NULL, 0);
+
+       // default to disable when RL sequential delay check is disabled
+       disable_hwl_validity = disable_sequential_delay_check;
+       s = lookup_env(priv, "ddr_disable_hwl_validity");
+       if (s)
+               disable_hwl_validity = !!strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_wl_rtt_nom");
+       if (s)
+               default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_match_wl_rtt_nom");
+       if (s)
+               match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
+
+       if (match_wl_rtt_nom)
+               mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+       // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
+       // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
+       if (ddr_type == DDR4_DRAM) {
+               int default_or_dis = 1;
+               int default_bitmask = 0xff;
+
+               // when x4, use only the lower nibble
+               if (dram_width == 4) {
+                       default_bitmask = 0x0f;
+                       if (wl_print) {
+                               debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
+                                     node, if_num, default_bitmask);
+                       }
+               }
+
+               wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
+               wl_ctl.s.or_dis = default_or_dis;
+               wl_ctl.s.bitmask = default_bitmask;
+
+               // allow overrides
+               s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
+               if (s)
+                       wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+
+               s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
+               if (s)
+                       wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
+
+               // print only if not defaults
+               if (wl_ctl.s.or_dis != default_or_dis ||
+                   wl_ctl.s.bitmask != default_bitmask) {
+                       debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+                             node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
+               }
+
+               // always write
+               lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
+       }
+
+       // Start the hardware write-leveling loop per rank
+       for (rankx = 0; rankx < dimm_count * 4; rankx++)
+               lmc_write_leveling_loop(priv, rankx);
+
+       cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       cfg.cn78xx.mode32b = save_mode32b;
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+       debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+       // At the end of HW Write Leveling, check on some DESKEW things...
+       if (!disable_deskew_training) {
+               struct deskew_counts dsk_counts;
+               int retry_count = 0;
+
+               debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
+                     node, if_num);
+
+               do {
+                       validate_deskew_training(priv, rank_mask, if_num,
+                                                &dsk_counts, 1);
+
+                       // only RAWCARD A or B will not benefit from
+                       // retraining if there's only saturation
+                       // or any rawcard if there is a nibble error
+                       if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
+                           (dsk_counts.nibrng_errs != 0 ||
+                            dsk_counts.nibunl_errs != 0)) {
+                               retry_count++;
+                               debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
+                                     node, if_num, retry_count);
+                               perform_deskew_training(priv, rank_mask, if_num,
+                                                       spd_rawcard_aorb);
+                       } else {
+                               break;
+                       }
+               } while (retry_count < 5);
+       }
+}
+
+static void lmc_workaround(struct ddr_priv *priv)
+{
+       /* Workaround Trcd overflow by using Additive latency. */
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+               union cvmx_lmcx_modereg_params0 mp0;
+               union cvmx_lmcx_timing_params1 tp1;
+               union cvmx_lmcx_control ctrl;
+               int rankx;
+
+               tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+               mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+               ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+
+               if (tp1.cn78xx.trcd == 0) {
+                       debug("Workaround Trcd overflow by using Additive latency.\n");
+                       /* Hard code this to 12 and enable additive latency */
+                       tp1.cn78xx.trcd = 12;
+                       mp0.s.al = 2;   /* CL-2 */
+                       ctrl.s.pocas = 1;
+
+                       debug("MODEREG_PARAMS0                               : 0x%016llx\n",
+                             mp0.u64);
+                       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+                              mp0.u64);
+                       debug("TIMING_PARAMS1                                : 0x%016llx\n",
+                             tp1.u64);
+                       lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+
+                       debug("LMC_CONTROL                                   : 0x%016llx\n",
+                             ctrl.u64);
+                       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+
+                       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+                               if (!(rank_mask & (1 << rankx)))
+                                       continue;
+
+                               /* MR1 */
+                               ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
+                       }
+               }
+       }
+
+       // this is here just for output, to allow check of the Deskew
+       // settings one last time...
+       if (!disable_deskew_training) {
+               struct deskew_counts dsk_counts;
+
+               debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
+                     node, if_num);
+               validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
+                                        3);
+       }
+
+       /*
+        * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
+        *
+        * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
+        * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
+        * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
+        */
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+           octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
+               union cvmx_lmcx_dll_ctl3 dll_ctl3;
+               union cvmx_lmcx_phy_ctl2 phy_ctl2;
+               union cvmx_lmcx_ext_config ext_cfg;
+               int increased_dsk_adj = 0;
+               int byte;
+
+               phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
+               ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+               dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+               for (byte = 0; byte < 8; ++byte) {
+                       if (!(if_bytemask & (1 << byte)))
+                               continue;
+                       increased_dsk_adj |=
+                           (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
+               }
+
+               if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
+                       ext_cfg.s.drive_ena_bprch = 1;
+                       lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
+                       debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
+                             if_num);
+               }
+       }
+}
+
+// Software Write-Leveling block
+
+#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
+#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
+// full window is valid for 0x00 to 0x4A
+// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
+#define VREF_LIMIT        (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
+#define VREF_FINAL        (VREF_LIMIT - 1)
+
+enum sw_wl_status {
+       WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
+       WL_HARDWARE = 1,        /* H/W wleveling succeeded */
+       WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
+       WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
+};
+
+static u64 rank_addr __section(".data");
+static int vref_val __section(".data");
+static int final_vref_val __section(".data");
+static int final_vref_range __section(".data");
+static int start_vref_val __section(".data");
+static int computed_final_vref_val __section(".data");
+static char best_vref_val_count __section(".data");
+static char vref_val_count __section(".data");
+static char best_vref_val_start __section(".data");
+static char vref_val_start __section(".data");
+static int bytes_failed __section(".data");
+static enum sw_wl_status byte_test_status[9] __section(".data");
+static enum sw_wl_status sw_wl_rank_status __section(".data");
+static int sw_wl_failed __section(".data");
+static int sw_wl_hw __section(".data");
+static int measured_vref_flag __section(".data");
+
+static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
+{
+       char *s;
+
+       if (vref_val < VREF_FINAL) {
+               int vrange, vvalue;
+
+               if (vref_val < VREF_RANGE2_LIMIT) {
+                       vrange = 1;
+                       vvalue = vref_val;
+               } else {
+                       vrange = 0;
+                       vvalue = vref_val - VREF_RANGE2_LIMIT;
+               }
+
+               set_vref(priv, if_num, rankx, vrange, vvalue);
+       } else {                /* if (vref_val < VREF_FINAL) */
+               /* Print the final vref value first. */
+
+               /* Always print the computed first if its valid */
+               if (computed_final_vref_val >= 0) {
+                       debug("N%d.LMC%d.R%d: vref Computed Summary                 :              %2d (0x%02x)\n",
+                             node, if_num, rankx,
+                             computed_final_vref_val, computed_final_vref_val);
+               }
+
+               if (!measured_vref_flag) {      // setup to use the computed
+                       best_vref_val_count = 1;
+                       final_vref_val = computed_final_vref_val;
+               } else {        // setup to use the measured
+                       if (best_vref_val_count > 0) {
+                               best_vref_val_count =
+                                   max(best_vref_val_count, (char)2);
+                               final_vref_val = best_vref_val_start +
+                                       divide_nint(best_vref_val_count - 1, 2);
+
+                               if (final_vref_val < VREF_RANGE2_LIMIT) {
+                                       final_vref_range = 1;
+                               } else {
+                                       final_vref_range = 0;
+                                       final_vref_val -= VREF_RANGE2_LIMIT;
+                               }
+
+                               int vvlo = best_vref_val_start;
+                               int vrlo;
+                               int vvhi = best_vref_val_start +
+                                       best_vref_val_count - 1;
+                               int vrhi;
+
+                               if (vvlo < VREF_RANGE2_LIMIT) {
+                                       vrlo = 2;
+                               } else {
+                                       vrlo = 1;
+                                       vvlo -= VREF_RANGE2_LIMIT;
+                               }
+
+                               if (vvhi < VREF_RANGE2_LIMIT) {
+                                       vrhi = 2;
+                               } else {
+                                       vrhi = 1;
+                                       vvhi -= VREF_RANGE2_LIMIT;
+                               }
+                               debug("N%d.LMC%d.R%d: vref Training Summary                 :  0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
+                                     node, if_num, rankx, vvlo, vrlo,
+                                     final_vref_val,
+                                     final_vref_range + 1, vvhi, vrhi,
+                                     best_vref_val_count - 1);
+
+                       } else {
+                               /*
+                                * If nothing passed use the default vref
+                                * value for this rank
+                                */
+                               union cvmx_lmcx_modereg_params2 mp2;
+
+                               mp2.u64 =
+                                       lmc_rd(priv,
+                                              CVMX_LMCX_MODEREG_PARAMS2(if_num));
+                               final_vref_val = (mp2.u64 >>
+                                                 (rankx * 10 + 3)) & 0x3f;
+                               final_vref_range = (mp2.u64 >>
+                                                   (rankx * 10 + 9)) & 0x01;
+
+                               debug("N%d.LMC%d.R%d: vref Using Default                    :    %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
+                                     node, if_num, rankx, final_vref_val,
+                                     final_vref_val, final_vref_val,
+                                     final_vref_val, final_vref_range + 1);
+                       }
+               }
+
+               // allow override
+               s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
+                              if_num, !!(rankx & 2), !!(rankx & 1));
+               if (s)
+                       final_vref_val = strtoul(s, NULL, 0);
+
+               set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
+       }
+}
+
+#define WL_MIN_NO_ERRORS_COUNT 3       // FIXME? three passes without errors
+
+static int errors __section(".data");
+static int byte_delay[9] __section(".data");
+static u64 bytemask __section(".data");
+static int bytes_todo __section(".data");
+static int no_errors_count __section(".data");
+static u64 bad_bits[2] __section(".data");
+static u64 sum_dram_dclk __section(".data");
+static u64 sum_dram_ops __section(".data");
+static u64 start_dram_dclk __section(".data");
+static u64 stop_dram_dclk __section(".data");
+static u64 start_dram_ops __section(".data");
+static u64 stop_dram_ops __section(".data");
+
+static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
+{
+       int delay;
+       int b;
+
+       // write the current set of WL delays
+       lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
+       wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
+
+       // do the test
+       if (sw_wl_hw) {
+               errors = run_best_hw_patterns(priv, if_num, rank_addr,
+                                             DBTRAIN_TEST, bad_bits);
+               errors &= bytes_todo;   // keep only the ones we are still doing
+       } else {
+               start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+               start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
+               errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
+                                         bad_bits);
+
+               stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+               stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
+               sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
+               sum_dram_ops += stop_dram_ops - start_dram_ops;
+       }
+
+       debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
+
+       // remember, errors will not be returned for byte-lanes that have
+       // maxxed out...
+       if (errors == 0) {
+               no_errors_count++;      // bump
+               // bypass check/update completely
+               if (no_errors_count > 1)
+                       return; // to end of do-while
+       } else {
+               no_errors_count = 0;    // reset
+       }
+
+       // check errors by byte
+       for (b = 0; b < 9; ++b) {
+               if (!(bytes_todo & (1 << b)))
+                       continue;
+
+               delay = byte_delay[b];
+               // yes, an error in this byte lane
+               if (errors & (1 << b)) {
+                       debug("        byte %d delay %2d Errors\n", b, delay);
+                       // since this byte had an error, we move to the next
+                       // delay value, unless done with it
+                       delay += 8;     // incr by 8 to do delay high-order bits
+                       if (delay < 32) {
+                               upd_wl_rank(&wl_rank, b, delay);
+                               debug("        byte %d delay %2d New\n",
+                                     b, delay);
+                               byte_delay[b] = delay;
+                       } else {
+                               // reached max delay, maybe really done with
+                               // this byte
+                               // consider an alt only for computed VREF and
+                               if (!measured_vref_flag &&
+                                   (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
+                                       // if an alt exists...
+                                       // just orig low-3 bits
+                                       int bad_delay = delay & 0x6;
+
+                                       // yes, use it
+                                       delay = hwl_alts[rankx].hwl_alt_delay[b];
+                                       // clear that flag
+                                       hwl_alts[rankx].hwl_alt_mask &=
+                                               ~(1 << b);
+                                       upd_wl_rank(&wl_rank, b, delay);
+                                       byte_delay[b] = delay;
+                                       debug("        byte %d delay %2d ALTERNATE\n",
+                                             b, delay);
+                                       debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
+                                             node, if_num,
+                                             rankx, b, bad_delay, delay);
+
+                               } else {
+                                       unsigned int bits_bad;
+
+                                       if (b < 8) {
+                                               // test no longer, remove from
+                                               // byte mask
+                                               bytemask &=
+                                                       ~(0xffULL << (8 * b));
+                                               bits_bad = (unsigned int)
+                                                       ((bad_bits[0] >>
+                                                         (8 * b)) & 0xffUL);
+                                       } else {
+                                               bits_bad = (unsigned int)
+                                                   (bad_bits[1] & 0xffUL);
+                                       }
+
+                                       // remove from bytes to do
+                                       bytes_todo &= ~(1 << b);
+                                       // make sure this is set for this case
+                                       byte_test_status[b] = WL_ESTIMATED;
+                                       debug("        byte %d delay %2d Exhausted\n",
+                                             b, delay);
+                                       if (!measured_vref_flag) {
+                                               // this is too noisy when doing
+                                               // measured VREF
+                                               debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
+                                                     node, if_num, rankx,
+                                                     b, bits_bad, delay);
+                                       }
+                               }
+                       }
+               } else {
+                       // no error, stay with current delay, but keep testing
+                       // it...
+                       debug("        byte %d delay %2d Passed\n", b, delay);
+                       byte_test_status[b] = WL_HARDWARE;      // change status
+               }
+       }                       /* for (b = 0; b < 9; ++b) */
+}
+
+static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
+{
+       int save_byte8 = wl_rank.s.byte8;
+
+       byte_test_status[8] = WL_HARDWARE;      /* H/W delay value */
+
+       if (save_byte8 != wl_rank.s.byte3 &&
+           save_byte8 != wl_rank.s.byte4) {
+               int test_byte8 = save_byte8;
+               int test_byte8_error;
+               int byte8_error = 0x1f;
+               int adder;
+               int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
+                                           2);
+
+               for (adder = 0; adder <= 32; adder += 8) {
+                       test_byte8_error = abs((adder + save_byte8) -
+                                              avg_bytes);
+                       if (test_byte8_error < byte8_error) {
+                               byte8_error = test_byte8_error;
+                               test_byte8 = save_byte8 + adder;
+                       }
+               }
+
+               // only do the check if we are not using measured VREF
+               if (!measured_vref_flag) {
+                       /* Use only even settings, rounding down... */
+                       test_byte8 &= ~1;
+
+                       // do validity check on the calculated ECC delay value
+                       // this depends on the DIMM type
+                       if (spd_rdimm) {        // RDIMM
+                               // but not mini-RDIMM
+                               if (spd_dimm_type != 5) {
+                                       // it can be > byte4, but should never
+                                       // be > byte3
+                                       if (test_byte8 > wl_rank.s.byte3) {
+                                               /* say it is still estimated */
+                                               byte_test_status[8] =
+                                                       WL_ESTIMATED;
+                                       }
+                               }
+                       } else {        // UDIMM
+                               if (test_byte8 < wl_rank.s.byte3 ||
+                                   test_byte8 > wl_rank.s.byte4) {
+                                       // should never be outside the
+                                       // byte 3-4 range
+                                       /* say it is still estimated */
+                                       byte_test_status[8] = WL_ESTIMATED;
+                               }
+                       }
+                       /*
+                        * Report whenever the calculation appears bad.
+                        * This happens if some of the original values were off,
+                        * or unexpected geometry from DIMM type, or custom
+                        * circuitry (NIC225E, I am looking at you!).
+                        * We will trust the calculated value, and depend on
+                        * later testing to catch any instances when that
+                        * value is truly bad.
+                        */
+                       // ESTIMATED means there may be an issue
+                       if (byte_test_status[8] == WL_ESTIMATED) {
+                               debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
+                                     node, if_num, rankx,
+                                     (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
+                                     test_byte8, wl_rank.s.byte3);
+                               byte_test_status[8] = WL_HARDWARE;
+                       }
+               }
+               /* Use only even settings */
+               wl_rank.s.byte8 = test_byte8 & ~1;
+       }
+
+       if (wl_rank.s.byte8 != save_byte8) {
+               /* Change the status if s/w adjusted the delay */
+               byte_test_status[8] = WL_SOFTWARE;      /* Estimated delay */
+       }
+}
+
+static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
+                                                  int rankx)
+{
+       int errors;
+       int byte_delay[8];
+       int byte_passed[8];
+       u64 bytemask;
+       u64 bitmask;
+       int wl_offset;
+       int bytes_todo;
+       int sw_wl_offset = 1;
+       int delay;
+       int b;
+
+       for (b = 0; b < 8; ++b)
+               byte_passed[b] = 0;
+
+       bytes_todo = if_bytemask;
+
+       for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
+               debug("Starting wl_offset for-loop: %d\n", wl_offset);
+
+               bytemask = 0;
+
+               for (b = 0; b < 8; ++b) {
+                       byte_delay[b] = 0;
+                       // this does not contain fully passed bytes
+                       if (!(bytes_todo & (1 << b)))
+                               continue;
+
+                       // reset across passes if not fully passed
+                       byte_passed[b] = 0;
+                       upd_wl_rank(&wl_rank, b, 0);    // all delays start at 0
+                       bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
+                       // set the bytes bits in the bytemask
+                       bytemask |= bitmask << (8 * b);
+               }               /* for (b = 0; b < 8; ++b) */
+
+               // start a pass if there is any byte lane to test
+               while (bytemask != 0) {
+                       debug("Starting bytemask while-loop: 0x%llx\n",
+                             bytemask);
+
+                       // write this set of WL delays
+                       lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                              wl_rank.u64);
+                       wl_rank.u64 = lmc_rd(priv,
+                                            CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                   if_num));
+
+                       // do the test
+                       if (sw_wl_hw) {
+                               errors = run_best_hw_patterns(priv, if_num,
+                                                             rank_addr,
+                                                             DBTRAIN_TEST,
+                                                             NULL) & 0xff;
+                       } else {
+                               errors = test_dram_byte64(priv, if_num,
+                                                         rank_addr, bytemask,
+                                                         NULL);
+                       }
+
+                       debug("test_dram_byte returned 0x%x\n", errors);
+
+                       // check errors by byte
+                       for (b = 0; b < 8; ++b) {
+                               if (!(bytes_todo & (1 << b)))
+                                       continue;
+
+                               delay = byte_delay[b];
+                               if (errors & (1 << b)) {        // yes, an error
+                                       debug("        byte %d delay %2d Errors\n",
+                                             b, delay);
+                                       byte_passed[b] = 0;
+                               } else {        // no error
+                                       byte_passed[b] += 1;
+                                       // Look for consecutive working settings
+                                       if (byte_passed[b] == (1 + wl_offset)) {
+                                               debug("        byte %d delay %2d FULLY Passed\n",
+                                                     b, delay);
+                                               if (wl_offset == 1) {
+                                                       byte_test_status[b] =
+                                                               WL_SOFTWARE;
+                                               } else if (wl_offset == 0) {
+                                                       byte_test_status[b] =
+                                                               WL_SOFTWARE1;
+                                               }
+
+                                               // test no longer, remove
+                                               // from byte mask this pass
+                                               bytemask &= ~(0xffULL <<
+                                                             (8 * b));
+                                               // remove completely from
+                                               // concern
+                                               bytes_todo &= ~(1 << b);
+                                               // on to the next byte, bypass
+                                               // delay updating!!
+                                               continue;
+                                       } else {
+                                               debug("        byte %d delay %2d Passed\n",
+                                                     b, delay);
+                                       }
+                               }
+
+                               // error or no, here we move to the next delay
+                               // value for this byte, unless done all delays
+                               // only a byte that has "fully passed" will
+                               // bypass around this,
+                               delay += 2;
+                               if (delay < 32) {
+                                       upd_wl_rank(&wl_rank, b, delay);
+                                       debug("        byte %d delay %2d New\n",
+                                             b, delay);
+                                       byte_delay[b] = delay;
+                               } else {
+                                       // reached max delay, done with this
+                                       // byte
+                                       debug("        byte %d delay %2d Exhausted\n",
+                                             b, delay);
+                                       // test no longer, remove from byte
+                                       // mask this pass
+                                       bytemask &= ~(0xffULL << (8 * b));
+                               }
+                       }       /* for (b = 0; b < 8; ++b) */
+                       debug("End of for-loop: bytemask 0x%llx\n", bytemask);
+               }               /* while (bytemask != 0) */
+       }
+
+       for (b = 0; b < 8; ++b) {
+               // any bytes left in bytes_todo did not pass
+               if (bytes_todo & (1 << b)) {
+                       union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
+
+                       /*
+                        * Last resort. Use Rlevel settings to estimate
+                        * Wlevel if software write-leveling fails
+                        */
+                       debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
+                             b);
+                       lmc_rlevel_rank.u64 =
+                               lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                   if_num));
+                       rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
+               }
+       }                       /* for (b = 0; b < 8; ++b) */
+}
+
+static int lmc_sw_write_leveling(struct ddr_priv *priv)
+{
+       /* Try to determine/optimize write-level delays experimentally. */
+       union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
+       union cvmx_lmcx_config cfg;
+       int rankx;
+       int byte;
+       char *s;
+       int i;
+
+       int active_rank;
+       int sw_wl_enable = 1;   /* FIX... Should be customizable. */
+       int interfaces;
+
+       static const char * const wl_status_strings[] = {
+               "(e)",
+               "   ",
+               "   ",
+               "(1)"
+       };
+
+       // FIXME: make HW-assist the default now?
+       int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
+       int dram_connection = c_cfg->dram_connection;
+
+       s = lookup_env(priv, "ddr_sw_wlevel_hw");
+       if (s)
+               sw_wl_hw_default = !!strtoul(s, NULL, 0);
+       if (!if_64b)            // must use SW algo if 32-bit mode
+               sw_wl_hw_default = 0;
+
+       // can never use hw-assist
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+               sw_wl_hw_default = 0;
+
+       s = lookup_env(priv, "ddr_software_wlevel");
+       if (s)
+               sw_wl_enable = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr%d_dram_connection", if_num);
+       if (s)
+               dram_connection = !!strtoul(s, NULL, 0);
+
+       cvmx_rng_enable();
+
+       /*
+        * Get the measured_vref setting from the config, check for an
+        * override...
+        */
+       /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
+       // NOTE: measured VREF can only be done for DDR4
+       if (ddr_type == DDR4_DRAM) {
+               measured_vref_flag = c_cfg->measured_vref;
+               s = lookup_env(priv, "ddr_measured_vref");
+               if (s)
+                       measured_vref_flag = !!strtoul(s, NULL, 0);
+       } else {
+               measured_vref_flag = 0; // OFF for DDR3
+       }
+
+       /*
+        * Ensure disabled ECC for DRAM tests using the SW algo, else leave
+        * it untouched
+        */
+       if (!sw_wl_hw_default) {
+               cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+               cfg.cn78xx.ecc_ena = 0;
+               lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+       }
+
+       /*
+        * We need to track absolute rank number, as well as how many
+        * active ranks we have.  Two single rank DIMMs show up as
+        * ranks 0 and 2, but only 2 ranks are active.
+        */
+       active_rank = 0;
+
+       interfaces = __builtin_popcount(if_mask);
+
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               final_vref_range = 0;
+               start_vref_val = 0;
+               computed_final_vref_val = -1;
+               sw_wl_rank_status = WL_HARDWARE;
+               sw_wl_failed = 0;
+               sw_wl_hw = sw_wl_hw_default;
+
+               if (!sw_wl_enable)
+                       break;
+
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
+                     node, if_num, rankx,
+                     (sw_wl_hw) ? "with H/W assist" :
+                     "with S/W algorithm");
+
+               if (ddr_type == DDR4_DRAM && num_ranks != 4) {
+                       // always compute when we can...
+                       computed_final_vref_val =
+                           compute_vref_val(priv, if_num, rankx, dimm_count,
+                                            num_ranks, imp_val,
+                                            is_stacked_die, dram_connection);
+
+                       // but only use it if allowed
+                       if (!measured_vref_flag) {
+                               // skip all the measured vref processing,
+                               // just the final setting
+                               start_vref_val = VREF_FINAL;
+                       }
+               }
+
+               /* Save off the h/w wl results */
+               wl_rank_hw_res.u64 = lmc_rd(priv,
+                                           CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                  if_num));
+
+               vref_val_count = 0;
+               vref_val_start = 0;
+               best_vref_val_count = 0;
+               best_vref_val_start = 0;
+
+               /* Loop one extra time using the Final vref value. */
+               for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
+                    ++vref_val) {
+                       if (ddr_type == DDR4_DRAM)
+                               ddr4_vref_loop(priv, rankx);
+
+                       /* Restore the saved value */
+                       wl_rank.u64 = wl_rank_hw_res.u64;
+
+                       for (byte = 0; byte < 9; ++byte)
+                               byte_test_status[byte] = WL_ESTIMATED;
+
+                       if (wl_mask_err == 0) {
+                               /*
+                                * Determine address of DRAM to test for
+                                * pass 1 of software write leveling.
+                                */
+                               rank_addr = active_rank *
+                                       (1ull << (pbank_lsb - bunk_enable +
+                                                 (interfaces / 2)));
+
+                               /*
+                                * Adjust address for boot bus hole in memory
+                                * map.
+                                */
+                               if (rank_addr > 0x10000000)
+                                       rank_addr += 0x10000000;
+
+                               debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
+                                     node, if_num, rankx, active_rank,
+                                     rank_addr);
+
+                               // start parallel write-leveling block for
+                               // delay high-order bits
+                               errors = 0;
+                               no_errors_count = 0;
+                               sum_dram_dclk = 0;
+                               sum_dram_ops = 0;
+
+                               if (if_64b) {
+                                       bytes_todo = (sw_wl_hw) ?
+                                               if_bytemask : 0xFF;
+                                       bytemask = ~0ULL;
+                               } else {
+                                       // 32-bit, must be using SW algo,
+                                       // only data bytes
+                                       bytes_todo = 0x0f;
+                                       bytemask = 0x00000000ffffffffULL;
+                               }
+
+                               for (byte = 0; byte < 9; ++byte) {
+                                       if (!(bytes_todo & (1 << byte))) {
+                                               byte_delay[byte] = 0;
+                                       } else {
+                                               byte_delay[byte] =
+                                                   get_wl_rank(&wl_rank, byte);
+                                       }
+                               }       /* for (byte = 0; byte < 9; ++byte) */
+
+                               do {
+                                       lmc_sw_write_leveling_loop(priv, rankx);
+                               } while (no_errors_count <
+                                        WL_MIN_NO_ERRORS_COUNT);
+
+                               if (!sw_wl_hw) {
+                                       u64 percent_x10;
+
+                                       if (sum_dram_dclk == 0)
+                                               sum_dram_dclk = 1;
+                                       percent_x10 = sum_dram_ops * 1000 /
+                                               sum_dram_dclk;
+                                       debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
+                                             node, if_num, rankx, sum_dram_ops,
+                                             sum_dram_dclk, percent_x10 / 10,
+                                             percent_x10 % 10);
+                               }
+                               if (errors) {
+                                       debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
+                                             vref_val, vref_val, errors);
+                               }
+                               // end parallel write-leveling block for
+                               // delay high-order bits
+
+                               // if we used HW-assist, we did the ECC byte
+                               // when approp.
+                               if (sw_wl_hw) {
+                                       if (wl_print) {
+                                               debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
+                                                     node, if_num, rankx);
+                                       }
+                                       goto no_ecc_estimate;
+                               }
+
+                               if ((if_bytemask & 0xff) == 0xff) {
+                                       if (use_ecc) {
+                                               sw_write_lvl_use_ecc(priv,
+                                                                    rankx);
+                                       } else {
+                                               /* H/W delay value */
+                                               byte_test_status[8] =
+                                                       WL_HARDWARE;
+                                               /* ECC is not used */
+                                               wl_rank.s.byte8 =
+                                                       wl_rank.s.byte0;
+                                       }
+                               } else {
+                                       if (use_ecc) {
+                                               /* Estimate the ECC byte dly */
+                                               // add hi-order to b4
+                                               wl_rank.s.byte4 |=
+                                                       (wl_rank.s.byte3 &
+                                                        0x38);
+                                               if ((wl_rank.s.byte4 & 0x06) <
+                                                   (wl_rank.s.byte3 & 0x06)) {
+                                                       // must be next clock
+                                                       wl_rank.s.byte4 += 8;
+                                               }
+                                       } else {
+                                               /* ECC is not used */
+                                               wl_rank.s.byte4 =
+                                                       wl_rank.s.byte0;
+                                       }
+
+                                       /*
+                                        * Change the status if s/w adjusted
+                                        * the delay
+                                        */
+                                       /* Estimated delay */
+                                       byte_test_status[4] = WL_SOFTWARE;
+                               }       /* if ((if_bytemask & 0xff) == 0xff) */
+                       }       /* if (wl_mask_err == 0) */
+
+no_ecc_estimate:
+
+                       bytes_failed = 0;
+                       for (byte = 0; byte < 9; ++byte) {
+                               /* Don't accumulate errors for untested bytes */
+                               if (!(if_bytemask & (1 << byte)))
+                                       continue;
+                               bytes_failed +=
+                                   (byte_test_status[byte] == WL_ESTIMATED);
+                       }
+
+                       /* vref training loop is only used for DDR4  */
+                       if (ddr_type != DDR4_DRAM)
+                               break;
+
+                       if (bytes_failed == 0) {
+                               if (vref_val_count == 0)
+                                       vref_val_start = vref_val;
+
+                               ++vref_val_count;
+                               if (vref_val_count > best_vref_val_count) {
+                                       best_vref_val_count = vref_val_count;
+                                       best_vref_val_start = vref_val_start;
+                                       debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    0x%02x <----- ???? -----> 0x%02x\n",
+                                             node, if_num, rankx, vref_val,
+                                             best_vref_val_start,
+                                             best_vref_val_start +
+                                             best_vref_val_count - 1);
+                               }
+                       } else {
+                               vref_val_count = 0;
+                               debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    failed\n",
+                                     node, if_num, rankx, vref_val);
+                       }
+               }
+
+               /*
+                * Determine address of DRAM to test for software write
+                * leveling.
+                */
+               rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
+                                                   (interfaces / 2)));
+               /* Adjust address for boot bus hole in memory map. */
+               if (rank_addr > 0x10000000)
+                       rank_addr += 0x10000000;
+
+               debug("Rank Address: 0x%llx\n", rank_addr);
+
+               if (bytes_failed) {
+                       // FIXME? the big hammer, did not even try SW WL pass2,
+                       // assume only chip reset will help
+                       debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
+                             node, if_num, rankx);
+                       sw_wl_failed = 1;
+               } else {        /* if (bytes_failed) */
+                       // SW WL pass 1 was OK, write the settings
+                       lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                              wl_rank.u64);
+                       wl_rank.u64 = lmc_rd(priv,
+                                            CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                   if_num));
+
+                       // do validity check on the delay values by running
+                       // the test 1 more time...
+                       // FIXME: we really need to check the ECC byte setting
+                       // here as well, so we need to enable ECC for this test!
+                       // if there are any errors, claim SW WL failure
+                       u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
+                               0x00000000ffffffffULL;
+                       int errors;
+
+                       // do the test
+                       if (sw_wl_hw) {
+                               errors = run_best_hw_patterns(priv, if_num,
+                                                             rank_addr,
+                                                             DBTRAIN_TEST,
+                                                             NULL) & 0xff;
+                       } else {
+                               errors = test_dram_byte64(priv, if_num,
+                                                         rank_addr, datamask,
+                                                         NULL);
+                       }
+
+                       if (errors) {
+                               debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
+                                     node, if_num, rankx, errors);
+                               sw_wl_failed = 1;
+                       }
+               }               /* if (bytes_failed) */
+
+               // FIXME? dump the WL settings, so we get more of a clue
+               // as to what happened where
+               debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX  : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
+                     node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
+                     wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
+                     wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
+                     wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
+                     wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
+                     wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
+                     wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
+                     wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
+                     wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
+                     wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
+                     (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
+
+               // finally, check for fatal conditions: either chip reset
+               // right here, or return error flag
+               if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
+                   sw_wl_failed) {
+                       if (!ddr_disable_chip_reset) {  // do chip RESET
+                               printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
+                                      node, if_num, rankx);
+                               mdelay(500);
+                               do_reset(NULL, 0, 0, NULL);
+                       } else {
+                               // return error flag so LMC init can be retried.
+                               debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
+                                     node, if_num, rankx);
+                               return -EAGAIN; // 0 indicates restart possible.
+                       }
+               }
+               active_rank++;
+       }
+
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               int parameter_set = 0;
+               u64 value;
+
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
+                                                                 if_num));
+
+               for (i = 0; i < 9; ++i) {
+                       s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
+                                      if_num, rankx, i);
+                       if (s) {
+                               parameter_set |= 1;
+                               value = strtoul(s, NULL, 0);
+
+                               upd_wl_rank(&wl_rank, i, value);
+                       }
+               }
+
+               s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
+               if (s) {
+                       parameter_set |= 1;
+                       value = strtoull(s, NULL, 0);
+                       wl_rank.u64 = value;
+               }
+
+               if (parameter_set) {
+                       lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+                              wl_rank.u64);
+                       wl_rank.u64 =
+                           lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
+                       display_wl(if_num, wl_rank, rankx);
+               }
+               // if there are unused entries to be filled
+               if ((rank_mask & 0x0F) != 0x0F) {
+                       if (rankx < 3) {
+                               debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
+                                     node, if_num, rankx);
+
+                               // if rank 0, write ranks 1 and 2 here if empty
+                               if (rankx == 0) {
+                                       // check that rank 1 is empty
+                                       if (!(rank_mask & (1 << 1))) {
+                                               debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+                                                     node, if_num, rankx, 1);
+                                               lmc_wr(priv,
+                                                      CVMX_LMCX_WLEVEL_RANKX(1,
+                                                               if_num),
+                                                      wl_rank.u64);
+                                       }
+
+                                       // check that rank 2 is empty
+                                       if (!(rank_mask & (1 << 2))) {
+                                               debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+                                                     node, if_num, rankx, 2);
+                                               lmc_wr(priv,
+                                                      CVMX_LMCX_WLEVEL_RANKX(2,
+                                                               if_num),
+                                                      wl_rank.u64);
+                                       }
+                               }
+
+                               // if rank 0, 1 or 2, write rank 3 here if empty
+                               // check that rank 3 is empty
+                               if (!(rank_mask & (1 << 3))) {
+                                       debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+                                             node, if_num, rankx, 3);
+                                       lmc_wr(priv,
+                                              CVMX_LMCX_WLEVEL_RANKX(3,
+                                                                     if_num),
+                                              wl_rank.u64);
+                               }
+                       }
+               }
+       }
+
+       /* Enable 32-bit mode if required. */
+       cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       cfg.cn78xx.mode32b = (!if_64b);
+       debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+       /* Restore the ECC configuration */
+       if (!sw_wl_hw_default)
+               cfg.cn78xx.ecc_ena = use_ecc;
+
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+
+       return 0;
+}
+
+static void lmc_dll(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+       int setting[9];
+       int i;
+
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       for (i = 0; i < 9; ++i) {
+               SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
+               lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+               lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+               ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+               setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
+               debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
+                     GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
+                     setting[i]);
+       }
+
+       debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+             node, if_num, "DLL90 Setting 8:0",
+             setting[8], setting[7], setting[6], setting[5], setting[4],
+             setting[3], setting[2], setting[1], setting[0]);
+
+       process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
+                                  c_cfg->dll_write_offset,
+                                  "ddr%d_dll_write_offset_byte%d", 1);
+       process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
+                                  c_cfg->dll_read_offset,
+                                  "ddr%d_dll_read_offset_byte%d", 2);
+}
+
+#define SLOT_CTL_INCR(csr, chip, field, incr)                          \
+       csr.chip.field = (csr.chip.field < (64 - incr)) ?               \
+               (csr.chip.field + incr) : 63
+
+#define INCR(csr, chip, field, incr)                                    \
+       csr.chip.field = (csr.chip.field < (64 - incr)) ?               \
+               (csr.chip.field + incr) : 63
+
+static void lmc_workaround_2(struct ddr_priv *priv)
+{
+       /* Workaround Errata 21063 */
+       if (octeon_is_cpuid(OCTEON_CN78XX) ||
+           octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
+               union cvmx_lmcx_slot_ctl0 slot_ctl0;
+               union cvmx_lmcx_slot_ctl1 slot_ctl1;
+               union cvmx_lmcx_slot_ctl2 slot_ctl2;
+               union cvmx_lmcx_ext_config ext_cfg;
+
+               slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
+               slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
+               slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
+
+               ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+
+               /* When ext_cfg.s.read_ena_bprch is set add 1 */
+               if (ext_cfg.s.read_ena_bprch) {
+                       SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
+                       SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
+                       SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
+                       SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
+               }
+
+               /* Always add 2 */
+               SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
+               SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
+
+               lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
+               lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
+               lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
+       }
+
+       /* Workaround Errata 21216 */
+       if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
+           octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
+               union cvmx_lmcx_slot_ctl1 slot_ctl1;
+               union cvmx_lmcx_slot_ctl2 slot_ctl2;
+
+               slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
+               slot_ctl1.cn78xx.w2w_xrank_init =
+                   max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
+               lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
+
+               slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
+               slot_ctl2.cn78xx.w2w_xdimm_init =
+                   max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
+               lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
+       }
+}
+
+static void lmc_final(struct ddr_priv *priv)
+{
+       /*
+        * 4.8.11 Final LMC Initialization
+        *
+        * Early LMC initialization, LMC write-leveling, and LMC read-leveling
+        * must be completed prior to starting this final LMC initialization.
+        *
+        * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
+        * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
+        * readleveling and write-leveling settings. Software should not write
+        * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
+        * values until after the final read-leveling and write-leveling
+        * settings are written.
+        *
+        * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
+        * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
+        * select the minimum gaps between read operations and write operations
+        * of various types.
+        *
+        * Software must not reduce the values in these CSR fields below the
+        * values previously selected by the LMC hardware (during write-leveling
+        * and read-leveling steps above).
+        *
+        * All sections in this chapter may be used to derive proper settings
+        * for these registers.
+        *
+        * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
+        * properly. This should be done prior to the first read.
+        */
+
+       /* Clear any residual ECC errors */
+       int num_tads = 1;
+       int tad;
+       int num_mcis = 1;
+       int mci;
+
+       if (octeon_is_cpuid(OCTEON_CN78XX)) {
+               num_tads = 8;
+               num_mcis = 4;
+       } else if (octeon_is_cpuid(OCTEON_CN70XX)) {
+               num_tads = 1;
+               num_mcis = 1;
+       } else if (octeon_is_cpuid(OCTEON_CN73XX) ||
+                  octeon_is_cpuid(OCTEON_CNF75XX)) {
+               num_tads = 4;
+               num_mcis = 3;
+       }
+
+       lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
+       lmc_rd(priv, CVMX_LMCX_INT(if_num));
+
+       for (tad = 0; tad < num_tads; tad++) {
+               l2c_wr(priv, CVMX_L2C_TADX_INT(tad),
+                      l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
+               debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
+                     l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
+       }
+
+       for (mci = 0; mci < num_mcis; mci++) {
+               l2c_wr(priv, CVMX_L2C_MCIX_INT(mci),
+                      l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
+               debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
+                     l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
+       }
+
+       debug("%-45s : 0x%08llx\n", "LMC_INT",
+             lmc_rd(priv, CVMX_LMCX_INT(if_num)));
+}
+
+static void lmc_scrambling(struct ddr_priv *priv)
+{
+       // Make sure scrambling is disabled during init...
+       union cvmx_lmcx_control ctrl;
+       union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
+       union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
+       union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
+       union cvmx_lmcx_ns_ctl lmc_ns_ctl;
+       int use_scramble = 0;   // default OFF
+       char *s;
+
+       ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
+       lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
+       lmc_scramble_cfg2.u64 = 0;      // quiet compiler
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+               lmc_scramble_cfg2.u64 =
+                   lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
+       }
+       lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
+
+       s = lookup_env_ull(priv, "ddr_use_scramble");
+       if (s)
+               use_scramble = simple_strtoull(s, NULL, 0);
+
+       /* Generate random values if scrambling is needed */
+       if (use_scramble) {
+               lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
+               lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
+               lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
+               lmc_ns_ctl.s.ns_scramble_dis = 0;
+               lmc_ns_ctl.s.adr_offset = 0;
+               ctrl.s.scramble_ena = 1;
+       }
+
+       s = lookup_env_ull(priv, "ddr_scramble_cfg0");
+       if (s) {
+               lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
+               ctrl.s.scramble_ena = 1;
+       }
+       debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
+             lmc_scramble_cfg0.u64);
+
+       lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
+
+       s = lookup_env_ull(priv, "ddr_scramble_cfg1");
+       if (s) {
+               lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
+               ctrl.s.scramble_ena = 1;
+       }
+       debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
+             lmc_scramble_cfg1.u64);
+       lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
+
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+               s = lookup_env_ull(priv, "ddr_scramble_cfg2");
+               if (s) {
+                       lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
+                       ctrl.s.scramble_ena = 1;
+               }
+               debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
+                     lmc_scramble_cfg1.u64);
+               lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
+                      lmc_scramble_cfg2.u64);
+       }
+
+       s = lookup_env_ull(priv, "ddr_ns_ctl");
+       if (s)
+               lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
+       debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
+       lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
+
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+}
+
+struct rl_score {
+       u64 setting;
+       int score;
+};
+
+static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
+static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
+static unsigned char rodt_ctl __section(".data");
+
+static int rl_rodt_err __section(".data");
+static unsigned char rtt_nom __section(".data");
+static unsigned char rtt_idx __section(".data");
+static char min_rtt_nom_idx __section(".data");
+static char max_rtt_nom_idx __section(".data");
+static char min_rodt_ctl __section(".data");
+static char max_rodt_ctl __section(".data");
+static int rl_dbg_loops __section(".data");
+static unsigned char save_ddr2t __section(".data");
+static int rl_samples __section(".data");
+static char rl_compute __section(".data");
+static char saved_ddr__ptune __section(".data");
+static char saved_ddr__ntune __section(".data");
+static char rl_comp_offs __section(".data");
+static char saved_int_zqcs_dis __section(".data");
+static int max_adj_rl_del_inc __section(".data");
+static int print_nom_ohms __section(".data");
+static int rl_print __section(".data");
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+static char part_number[21] __section(".data");
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+struct perfect_counts {
+       u16 count[9][32]; // 8+ECC by 64 values
+       u32 mask[9];      // 8+ECC, bitmask of perfect delays
+};
+
+static struct perfect_counts rank_perf[4] __section(".data");
+static struct perfect_counts rodt_perfect_counts __section(".data");
+static int pbm_lowsum_limit __section(".data");
+// FIXME: PBM skip for RODT 240 and 34
+static u32 pbm_rodt_skip __section(".data");
+
+// control rank majority processing
+static int disable_rank_majority __section(".data");
+
+// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
+// for DDR3
+static int enable_rldelay_bump __section(".data");
+static int rldelay_bump_incr __section(".data");
+static int disable_rlv_bump_this_byte __section(".data");
+static u64 value_mask __section(".data");
+
+static struct rlevel_byte_data rl_byte[9] __section(".data");
+static int sample_loops __section(".data");
+static int max_samples __section(".data");
+static int rl_rank_errors __section(".data");
+static int rl_mask_err __section(".data");
+static int rl_nonseq_err __section(".data");
+static struct rlevel_bitmask rl_mask[9] __section(".data");
+static int rl_best_rank_score __section(".data");
+
+static int rodt_row_skip_mask __section(".data");
+
+static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
+                     rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
+{
+       union cvmx_lmcx_comp_ctl2 cc2;
+       const int rl_separate_ab = 1;
+       int i;
+
+       rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
+       rl_rodt_err = 0;
+       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+       cc2.cn78xx.rodt_ctl = rodt_ctl;
+       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+       udelay(1); /* Give it a little time to take affect */
+       if (rl_print > 1) {
+               debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+                     cc2.cn78xx.rodt_ctl,
+                     imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+       }
+
+       memset(rl_byte, 0, sizeof(rl_byte));
+       memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
+
+       // when iter RODT is the target RODT, take more samples...
+       max_samples = rl_samples;
+       if (rodt_ctl == default_rodt_ctl)
+               max_samples += rl_samples + 1;
+
+       for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
+               int redoing_nonseq_errs = 0;
+
+               rl_mask_err = 0;
+
+               if (!(rl_separate_ab && spd_rdimm &&
+                     ddr_type == DDR4_DRAM)) {
+                       /* Clear read-level delays */
+                       lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+                       /* read-leveling */
+                       oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+                       do {
+                               rl_rank.u64 =
+                                       lmc_rd(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                     if_num));
+                       } while (rl_rank.cn78xx.status != 3);
+               }
+
+               rl_rank.u64 =
+                       lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+               // start bitmask interpretation block
+
+               memset(rl_mask, 0, sizeof(rl_mask));
+
+               if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
+                       union cvmx_lmcx_rlevel_rankx rl_rank_aside;
+                       union cvmx_lmcx_modereg_params0 mp0;
+
+                       /* A-side */
+                       mp0.u64 =
+                               lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+                       mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+                       lmc_wr(priv,
+                              CVMX_LMCX_MODEREG_PARAMS0(if_num),
+                              mp0.u64);
+
+                       /* Clear read-level delays */
+                       lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+                       /* read-leveling */
+                       oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+                       do {
+                               rl_rank.u64 =
+                                       lmc_rd(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                     if_num));
+                       } while (rl_rank.cn78xx.status != 3);
+
+                       rl_rank.u64 =
+                               lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                   if_num));
+
+                       rl_rank_aside.u64 = rl_rank.u64;
+
+                       rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
+                       rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
+                       rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
+                       rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
+                       rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
+                       /* A-side complete */
+
+                       /* B-side */
+                       mp0.u64 =
+                               lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+                       mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
+                       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+                              mp0.u64);
+
+                       /* Clear read-level delays */
+                       lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+                       /* read-leveling */
+                       oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+                       do {
+                               rl_rank.u64 =
+                                       lmc_rd(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                     if_num));
+                       } while (rl_rank.cn78xx.status != 3);
+
+                       rl_rank.u64 =
+                               lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                   if_num));
+
+                       rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
+                       rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
+                       rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
+                       rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
+                       /* B-side complete */
+
+                       upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
+                       upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
+                       upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
+                       upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
+                       /* ECC A-side */
+                       upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
+
+                       mp0.u64 =
+                               lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+                       mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+                       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+                              mp0.u64);
+               }
+
+               /*
+                * Evaluate the quality of the read-leveling delays from the
+                * bitmasks. Also save off a software computed read-leveling
+                * mask that may be used later to qualify the delay results
+                * from Octeon.
+                */
+               for (i = 0; i < (8 + ecc_ena); ++i) {
+                       int bmerr;
+
+                       if (!(if_bytemask & (1 << i)))
+                               continue;
+                       if (!(rl_separate_ab && spd_rdimm &&
+                             ddr_type == DDR4_DRAM)) {
+                               rl_mask[i].bm =
+                                       lmc_ddr3_rl_dbg_read(priv, if_num, i);
+                       }
+                       bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
+                                                            ddr_type);
+                       rl_mask[i].errs = bmerr;
+                       rl_mask_err += bmerr;
+                       // count only the "perfect" bitmasks
+                       if (ddr_type == DDR4_DRAM && !bmerr) {
+                               int delay;
+                               // FIXME: for now, simple filtering:
+                               // do NOT count PBMs for RODTs in skip mask
+                               if ((1U << rodt_ctl) & pbm_rodt_skip)
+                                       continue;
+                               // FIXME: could optimize this a bit?
+                               delay = get_rl_rank(&rl_rank, i);
+                               rank_perf[rankx].count[i][delay] += 1;
+                               rank_perf[rankx].mask[i] |=
+                                       (1ULL << delay);
+                               rodt_perfect_counts.count[i][delay] += 1;
+                               rodt_perfect_counts.mask[i] |= (1ULL << delay);
+                       }
+               }
+
+               /* Set delays for unused bytes to match byte 0. */
+               for (i = 0; i < 9; ++i) {
+                       if (if_bytemask & (1 << i))
+                               continue;
+                       upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
+               }
+
+               /*
+                * Save a copy of the byte delays in physical
+                * order for sequential evaluation.
+                */
+               unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
+
+       redo_nonseq_errs:
+
+               rl_nonseq_err  = 0;
+               if (!disable_sequential_delay_check) {
+                       for (i = 0; i < 9; ++i)
+                               rl_byte[i].sqerrs = 0;
+
+                       if ((if_bytemask & 0xff) == 0xff) {
+                               /*
+                                * Evaluate delay sequence across the whole
+                                * range of bytes for standard dimms.
+                                */
+                               /* 1=RDIMM, 5=Mini-RDIMM */
+                               if (spd_dimm_type == 1 || spd_dimm_type == 5) {
+                                       int reg_adj_del = abs(rl_byte[4].delay -
+                                                             rl_byte[5].delay);
+
+                                       /*
+                                        * Registered dimm topology routes
+                                        * from the center.
+                                        */
+                                       rl_nonseq_err +=
+                                               nonseq_del(rl_byte, 0,
+                                                          3 + ecc_ena,
+                                                          max_adj_rl_del_inc);
+                                       rl_nonseq_err +=
+                                               nonseq_del(rl_byte, 5,
+                                                          7 + ecc_ena,
+                                                          max_adj_rl_del_inc);
+                                       // byte 5 sqerrs never gets cleared
+                                       // for RDIMMs
+                                       rl_byte[5].sqerrs = 0;
+                                       if (reg_adj_del > 1) {
+                                               /*
+                                                * Assess proximity of bytes on
+                                                * opposite sides of register
+                                                */
+                                               rl_nonseq_err += (reg_adj_del -
+                                                                 1) *
+                                                       RLEVEL_ADJACENT_DELAY_ERROR;
+                                               // update byte 5 error
+                                               rl_byte[5].sqerrs +=
+                                                       (reg_adj_del - 1) *
+                                                       RLEVEL_ADJACENT_DELAY_ERROR;
+                                       }
+                               }
+
+                               /* 2=UDIMM, 6=Mini-UDIMM */
+                               if (spd_dimm_type == 2 || spd_dimm_type == 6) {
+                                       /*
+                                        * Unbuffered dimm topology routes
+                                        * from end to end.
+                                        */
+                                       rl_nonseq_err += nonseq_del(rl_byte, 0,
+                                                                   7 + ecc_ena,
+                                                                   max_adj_rl_del_inc);
+                               }
+                       } else {
+                               rl_nonseq_err += nonseq_del(rl_byte, 0,
+                                                           3 + ecc_ena,
+                                                           max_adj_rl_del_inc);
+                       }
+               } /* if (! disable_sequential_delay_check) */
+
+               rl_rank_errors = rl_mask_err + rl_nonseq_err;
+
+               // print original sample here only if we are not really
+               // averaging or picking best
+               // also do not print if we were redoing the NONSEQ score
+               // for using COMPUTED
+               if (!redoing_nonseq_errs && rl_samples < 2) {
+                       if (rl_print > 1) {
+                               display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
+                               display_rl_bm_scores(if_num, rankx, rl_mask,
+                                                    ecc_ena);
+                               display_rl_seq_scores(if_num, rankx, rl_byte,
+                                                     ecc_ena);
+                       }
+                       display_rl_with_score(if_num, rl_rank, rankx,
+                                             rl_rank_errors);
+               }
+
+               if (rl_compute) {
+                       if (!redoing_nonseq_errs) {
+                               /* Recompute the delays based on the bitmask */
+                               for (i = 0; i < (8 + ecc_ena); ++i) {
+                                       if (!(if_bytemask & (1 << i)))
+                                               continue;
+
+                                       upd_rl_rank(&rl_rank, i,
+                                                   compute_ddr3_rlevel_delay(
+                                                           rl_mask[i].mstart,
+                                                           rl_mask[i].width,
+                                                           rl_ctl));
+                               }
+
+                               /*
+                                * Override the copy of byte delays with the
+                                * computed results.
+                                */
+                               unpack_rlevel_settings(if_bytemask, ecc_ena,
+                                                      rl_byte, rl_rank);
+
+                               redoing_nonseq_errs = 1;
+                               goto redo_nonseq_errs;
+
+                       } else {
+                               /*
+                                * now print this if already printed the
+                                * original sample
+                                */
+                               if (rl_samples < 2 || rl_print) {
+                                       display_rl_with_computed(if_num,
+                                                                rl_rank, rankx,
+                                                                rl_rank_errors);
+                               }
+                       }
+               } /* if (rl_compute) */
+
+               // end bitmask interpretation block
+
+               // if it is a better (lower) score, then  keep it
+               if (rl_rank_errors < rl_best_rank_score) {
+                       rl_best_rank_score = rl_rank_errors;
+
+                       // save the new best delays and best errors
+                       for (i = 0; i < (8 + ecc_ena); ++i) {
+                               rl_byte[i].best = rl_byte[i].delay;
+                               rl_byte[i].bestsq = rl_byte[i].sqerrs;
+                               // save bitmasks and their scores as well
+                               // xlate UNPACKED index to PACKED index to
+                               // get from rl_mask
+                               rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
+                               rl_byte[i].bmerrs =
+                                       rl_mask[XUP(i, !!ecc_ena)].errs;
+                       }
+               }
+
+               rl_rodt_err += rl_rank_errors;
+       }
+
+       /* We recorded the best score across the averaging loops */
+       rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
+
+       /*
+        * Restore the delays from the best fields that go with the best
+        * score
+        */
+       for (i = 0; i < 9; ++i) {
+               rl_byte[i].delay = rl_byte[i].best;
+               rl_byte[i].sqerrs = rl_byte[i].bestsq;
+       }
+
+       rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+       pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
+
+       if (rl_samples > 1) {
+               // restore the "best" bitmasks and their scores for printing
+               for (i = 0; i < 9; ++i) {
+                       if ((if_bytemask & (1 << i)) == 0)
+                               continue;
+                       // xlate PACKED index to UNPACKED index to get from
+                       // rl_byte
+                       rl_mask[i].bm   = rl_byte[XPU(i, !!ecc_ena)].bm;
+                       rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
+               }
+
+               // maybe print bitmasks/scores here
+               if (rl_print > 1) {
+                       display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
+                       display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
+                       display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
+
+                       display_rl_with_rodt(if_num, rl_rank, rankx,
+                                            rl_score[rtt_nom][rodt_ctl][rankx].score,
+                                            print_nom_ohms,
+                                            imp_val->rodt_ohms[rodt_ctl],
+                                            WITH_RODT_BESTSCORE);
+
+                       debug("-----------\n");
+               }
+       }
+
+       rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
+
+       // print out the PBMs for the current RODT
+       if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
+               // FIXME: change verbosity level after debug complete...
+
+               for (i = 0; i < 9; i++) {
+                       u64 temp_mask;
+                       int num_values;
+
+                       // FIXME: PBM skip for RODTs in mask
+                       if ((1U << rodt_ctl) & pbm_rodt_skip)
+                               continue;
+
+                       temp_mask = rodt_perfect_counts.mask[i];
+                       num_values = __builtin_popcountll(temp_mask);
+                       i = __builtin_ffsll(temp_mask) - 1;
+
+                       debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
+                             node, if_num, rankx,
+                             imp_val->rodt_ohms[rodt_ctl],
+                             i, temp_mask >> i, num_values);
+
+                       while (temp_mask != 0) {
+                               i = __builtin_ffsll(temp_mask) - 1;
+                               debug("%2d(%2d) ", i,
+                                     rodt_perfect_counts.count[i][i]);
+                               temp_mask &= ~(1UL << i);
+                       } /* while (temp_mask != 0) */
+                       debug("\n");
+               }
+       }
+}
+
+static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
+                           rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
+{
+       /* Start with an arbitrarily high score */
+       int best_rank_score = DEFAULT_BEST_RANK_SCORE;
+       int best_rank_rtt_nom = 0;
+       int best_rank_ctl = 0;
+       int best_rank_ohms = 0;
+       int best_rankx = 0;
+       int dimm_rank_mask;
+       int max_rank_score;
+       union cvmx_lmcx_rlevel_rankx saved_rl_rank;
+       int next_ohms;
+       int orankx;
+       int next_score = 0;
+       int best_byte, new_byte, temp_byte, orig_best_byte;
+       int rank_best_bytes[9];
+       int byte_sh;
+       int avg_byte;
+       int avg_diff;
+       int i;
+
+       if (!(rank_mask & (1 << rankx)))
+               return;
+
+       // some of the rank-related loops below need to operate only on
+       // the ranks of a single DIMM,
+       // so create a mask for their use here
+       if (num_ranks == 4) {
+               dimm_rank_mask = rank_mask; // should be 1111
+       } else {
+               dimm_rank_mask = rank_mask & 3; // should be 01 or 11
+               if (rankx >= 2) {
+                       // doing a rank on the second DIMM, should be
+                       // 0100 or 1100
+                       dimm_rank_mask <<= 2;
+               }
+       }
+       debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
+             dimm_rank_mask, rank_mask, rankx);
+
+       // this is the start of the BEST ROW SCORE LOOP
+
+       for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+               rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+               debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
+                     node, if_num, rankx, rtt_nom,
+                     imp_val->rtt_nom_ohms[rtt_nom]);
+
+               for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+                    --rodt_ctl) {
+                       next_ohms = imp_val->rodt_ohms[rodt_ctl];
+
+                       // skip RODT rows in mask, but *NOT* rows with too
+                       // high a score;
+                       // we will not use the skipped ones for printing or
+                       // evaluating, but we need to allow all the
+                       // non-skipped ones to be candidates for "best"
+                       if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
+                               debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
+                                     node, if_num, rankx, rodt_ctl,
+                                     next_ohms, next_score);
+                               continue;
+                       }
+
+                       // this is ROFFIX-0528
+                       for (orankx = 0; orankx < dimm_count * 4; orankx++) {
+                               // stay on the same DIMM
+                               if (!(dimm_rank_mask & (1 << orankx)))
+                                       continue;
+
+                               next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
+
+                               // always skip a higher score
+                               if (next_score > best_rank_score)
+                                       continue;
+
+                               // if scores are equal
+                               if (next_score == best_rank_score) {
+                                       // always skip lower ohms
+                                       if (next_ohms < best_rank_ohms)
+                                               continue;
+
+                                       // if same ohms
+                                       if (next_ohms == best_rank_ohms) {
+                                               // always skip the other rank(s)
+                                               if (orankx != rankx)
+                                                       continue;
+                                       }
+                                       // else next_ohms are greater,
+                                       // always choose it
+                               }
+                               // else next_score is less than current best,
+                               // so always choose it
+                               debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
+                                     node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
+                                     best_rank_score, best_rank_ohms);
+                               best_rank_score     = next_score;
+                               best_rank_rtt_nom   = rtt_nom;
+                               //best_rank_nom_ohms  = rtt_nom_ohms;
+                               best_rank_ctl       = rodt_ctl;
+                               best_rank_ohms      = next_ohms;
+                               best_rankx          = orankx;
+                               rl_rank.u64 =
+                                       rl_score[rtt_nom][rodt_ctl][orankx].setting;
+                       }
+               }
+       }
+
+       // this is the end of the BEST ROW SCORE LOOP
+
+       // DANGER, Will Robinson!! Abort now if we did not find a best
+       // score at all...
+       if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
+               printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
+                      node, if_num, rankx);
+               mdelay(500);
+               do_reset(NULL, 0, 0, NULL);
+       }
+
+       // FIXME: relative now, but still arbitrary...
+       max_rank_score = best_rank_score;
+       if (ddr_type == DDR4_DRAM) {
+               // halve the range if 2 DIMMs unless they are single rank...
+               max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
+                                                          dimm_count : 1));
+       } else {
+               // Since DDR3 typically has a wider score range,
+               // keep more of them always
+               max_rank_score += MAX_RANK_SCORE_LIMIT;
+       }
+
+       if (!ecc_ena) {
+               /* ECC is not used */
+               rl_rank.s.byte8 = rl_rank.s.byte0;
+       }
+
+       // at the end, write the best row settings to the current rank
+       lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
+       rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+       saved_rl_rank.u64 = rl_rank.u64;
+
+       // this is the start of the PRINT LOOP
+       int pass;
+
+       // for pass==0, print current rank, pass==1 print other rank(s)
+       // this is done because we want to show each ranks RODT values
+       // together, not interlaced
+       // keep separates for ranks - pass=0 target rank, pass=1 other
+       // rank on DIMM
+       int mask_skipped[2] = {0, 0};
+       int score_skipped[2] = {0, 0};
+       int selected_rows[2] = {0, 0};
+       int zero_scores[2] = {0, 0};
+       for (pass = 0; pass < 2; pass++) {
+               for (orankx = 0; orankx < dimm_count * 4; orankx++) {
+                       // stay on the same DIMM
+                       if (!(dimm_rank_mask & (1 << orankx)))
+                               continue;
+
+                       if ((pass == 0 && orankx != rankx) ||
+                           (pass != 0 && orankx == rankx))
+                               continue;
+
+                       for (rtt_idx = min_rtt_nom_idx;
+                            rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+                               rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+                               if (dyn_rtt_nom_mask == 0) {
+                                       print_nom_ohms = -1;
+                               } else {
+                                       print_nom_ohms =
+                                               imp_val->rtt_nom_ohms[rtt_nom];
+                               }
+
+                               // cycle through all the RODT values...
+                               for (rodt_ctl = max_rodt_ctl;
+                                    rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+                                       union cvmx_lmcx_rlevel_rankx
+                                               temp_rl_rank;
+                                       int temp_score =
+                                               rl_score[rtt_nom][rodt_ctl][orankx].score;
+                                       int skip_row;
+
+                                       temp_rl_rank.u64 =
+                                               rl_score[rtt_nom][rodt_ctl][orankx].setting;
+
+                                       // skip RODT rows in mask, or rows
+                                       // with too high a score;
+                                       // we will not use them for printing
+                                       // or evaluating...
+                                       if ((1 << rodt_ctl) &
+                                           rodt_row_skip_mask) {
+                                               skip_row = WITH_RODT_SKIPPING;
+                                               ++mask_skipped[pass];
+                                       } else if (temp_score >
+                                                  max_rank_score) {
+                                               skip_row = WITH_RODT_SKIPPING;
+                                               ++score_skipped[pass];
+                                       } else {
+                                               skip_row = WITH_RODT_BLANK;
+                                               ++selected_rows[pass];
+                                               if (temp_score == 0)
+                                                       ++zero_scores[pass];
+                                       }
+
+                                       // identify and print the BEST ROW
+                                       // when it comes up
+                                       if (skip_row == WITH_RODT_BLANK &&
+                                           best_rankx == orankx &&
+                                           best_rank_rtt_nom == rtt_nom &&
+                                           best_rank_ctl == rodt_ctl)
+                                               skip_row = WITH_RODT_BESTROW;
+
+                                       if (rl_print) {
+                                               display_rl_with_rodt(if_num,
+                                                                    temp_rl_rank, orankx, temp_score,
+                                                                    print_nom_ohms,
+                                                                    imp_val->rodt_ohms[rodt_ctl],
+                                                                    skip_row);
+                                       }
+                               }
+                       }
+               }
+       }
+       debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
+             node, if_num, rankx, selected_rows[0], selected_rows[1],
+             zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
+             score_skipped[0], score_skipped[1]);
+       // this is the end of the PRINT LOOP
+
+       // now evaluate which bytes need adjusting
+       // collect the new byte values; first init with current best for
+       // neighbor use
+       for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
+               rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
+                       RLEVEL_BYTE_MSK;
+       }
+
+       // this is the start of the BEST BYTE LOOP
+
+       for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
+               int sum = 0, count = 0;
+               int count_less = 0, count_same = 0, count_more = 0;
+               int count_byte; // save the value we counted around
+               // for rank majority use
+               int rank_less = 0, rank_same = 0, rank_more = 0;
+               int neighbor;
+               int neigh_byte;
+
+               best_byte = rank_best_bytes[i];
+               orig_best_byte = rank_best_bytes[i];
+
+               // this is the start of the BEST BYTE AVERAGING LOOP
+
+               // validate the initial "best" byte by looking at the
+               // average of the unskipped byte-column entries
+               // we want to do this before we go further, so we can
+               // try to start with a better initial value
+               // this is the so-called "BESTBUY" patch set
+
+               for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+                    ++rtt_idx) {
+                       rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+                       for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+                            --rodt_ctl) {
+                               union cvmx_lmcx_rlevel_rankx temp_rl_rank;
+                               int temp_score;
+
+                               // average over all the ranks
+                               for (orankx = 0; orankx < dimm_count * 4;
+                                    orankx++) {
+                                       // stay on the same DIMM
+                                       if (!(dimm_rank_mask & (1 << orankx)))
+                                               continue;
+
+                                       temp_score =
+                                               rl_score[rtt_nom][rodt_ctl][orankx].score;
+                                       // skip RODT rows in mask, or rows with
+                                       // too high a score;
+                                       // we will not use them for printing or
+                                       // evaluating...
+
+                                       if (!((1 << rodt_ctl) &
+                                             rodt_row_skip_mask) &&
+                                           temp_score <= max_rank_score) {
+                                               temp_rl_rank.u64 =
+                                                       rl_score[rtt_nom][rodt_ctl][orankx].setting;
+                                               temp_byte =
+                                                       (int)(temp_rl_rank.u64 >> byte_sh) &
+                                                       RLEVEL_BYTE_MSK;
+                                               sum += temp_byte;
+                                               count++;
+                                       }
+                               }
+                       }
+               }
+
+               // this is the end of the BEST BYTE AVERAGING LOOP
+
+               // FIXME: validate count and sum??
+               avg_byte = (int)divide_nint(sum, count);
+               avg_diff = best_byte - avg_byte;
+               new_byte = best_byte;
+               if (avg_diff != 0) {
+                       // bump best up/dn by 1, not necessarily all the
+                       // way to avg
+                       new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
+               }
+
+               if (rl_print) {
+                       debug("N%d.LMC%d.R%d: START:   Byte %d: best %d is different by %d from average %d, using %d.\n",
+                             node, if_num, rankx,
+                             i, best_byte, avg_diff, avg_byte, new_byte);
+               }
+               best_byte = new_byte;
+               count_byte = new_byte; // save the value we will count around
+
+               // At this point best_byte is either:
+               // 1. the original byte-column value from the best scoring
+               //    RODT row, OR
+               // 2. that value bumped toward the average of all the
+               //    byte-column values
+               //
+               // best_byte will not change from here on...
+
+               // this is the start of the BEST BYTE COUNTING LOOP
+
+               // NOTE: we do this next loop separately from above, because
+               // we count relative to "best_byte"
+               // which may have been modified by the above averaging
+               // operation...
+
+               for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+                    ++rtt_idx) {
+                       rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+                       for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+                            --rodt_ctl) {
+                               union cvmx_lmcx_rlevel_rankx temp_rl_rank;
+                               int temp_score;
+
+                               for (orankx = 0; orankx < dimm_count * 4;
+                                    orankx++) { // count over all the ranks
+                                       // stay on the same DIMM
+                                       if (!(dimm_rank_mask & (1 << orankx)))
+                                               continue;
+
+                                       temp_score =
+                                               rl_score[rtt_nom][rodt_ctl][orankx].score;
+                                       // skip RODT rows in mask, or rows
+                                       // with too high a score;
+                                       // we will not use them for printing
+                                       // or evaluating...
+                                       if (((1 << rodt_ctl) &
+                                            rodt_row_skip_mask) ||
+                                           temp_score > max_rank_score)
+                                               continue;
+
+                                       temp_rl_rank.u64 =
+                                               rl_score[rtt_nom][rodt_ctl][orankx].setting;
+                                       temp_byte = (temp_rl_rank.u64 >>
+                                                    byte_sh) & RLEVEL_BYTE_MSK;
+
+                                       if (temp_byte == 0)
+                                               ;  // do not count it if illegal
+                                       else if (temp_byte == best_byte)
+                                               count_same++;
+                                       else if (temp_byte == best_byte - 1)
+                                               count_less++;
+                                       else if (temp_byte == best_byte + 1)
+                                               count_more++;
+                                       // else do not count anything more
+                                       // than 1 away from the best
+
+                                       // no rank counting if disabled
+                                       if (disable_rank_majority)
+                                               continue;
+
+                                       // FIXME? count is relative to
+                                       // best_byte; should it be rank-based?
+                                       // rank counts only on main rank
+                                       if (orankx != rankx)
+                                               continue;
+                                       else if (temp_byte == best_byte)
+                                               rank_same++;
+                                       else if (temp_byte == best_byte - 1)
+                                               rank_less++;
+                                       else if (temp_byte == best_byte + 1)
+                                               rank_more++;
+                               }
+                       }
+               }
+
+               if (rl_print) {
+                       debug("N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
+                             node, if_num, rankx,
+                             i, orig_best_byte, best_byte,
+                             count_more, count_same, count_less,
+                             rank_more, rank_same, rank_less);
+               }
+
+               // this is the end of the BEST BYTE COUNTING LOOP
+
+               // choose the new byte value
+               // we need to check that there is no gap greater than 2
+               // between adjacent bytes (adjacency depends on DIMM type)
+               // use the neighbor value to help decide
+               // initially, the rank_best_bytes[] will contain values from
+               // the chosen lowest score rank
+               new_byte = 0;
+
+               // neighbor is index-1 unless we are index 0 or index 8 (ECC)
+               neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
+               neigh_byte = rank_best_bytes[neighbor];
+
+               // can go up or down or stay the same, so look at a numeric
+               // average to help
+               new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
+                                            (count_same * (best_byte + 0)) +
+                                            (count_less * (best_byte - 1))),
+                                           max(1, (count_more + count_same +
+                                                   count_less)));
+
+               // use neighbor to help choose with average
+               if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
+                   !disable_sequential_delay_check) {
+                       // but not for byte 0
+                       int avg_pick = new_byte;
+
+                       if ((new_byte - best_byte) != 0) {
+                               // back to best, average did not get better
+                               new_byte = best_byte;
+                       } else {
+                               // avg was the same, still too far, now move
+                               // it towards the neighbor
+                               new_byte += (neigh_byte > new_byte) ? 1 : -1;
+                       }
+
+                       if (rl_print) {
+                               debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
+                                     node, if_num, rankx,
+                                     i, neighbor, neigh_byte, avg_pick,
+                                     new_byte);
+                       }
+               } else {
+                       // NOTE:
+                       // For now, we let the neighbor processing above trump
+                       // the new simple majority processing here.
+                       // This is mostly because we have seen no smoking gun
+                       // for a neighbor bad choice (yet?).
+                       // Also note that we will ALWAYS be using byte 0
+                       // majority, because of the if clause above.
+
+                       // majority is dependent on the counts, which are
+                       // relative to best_byte, so start there
+                       int maj_byte = best_byte;
+                       int rank_maj;
+                       int rank_sum;
+
+                       if (count_more > count_same &&
+                           count_more > count_less) {
+                               maj_byte++;
+                       } else if (count_less > count_same &&
+                                  count_less > count_more) {
+                               maj_byte--;
+                       }
+
+                       if (maj_byte != new_byte) {
+                               // print only when majority choice is
+                               // different from average
+                               if (rl_print) {
+                                       debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
+                                             node, if_num, rankx, i, maj_byte,
+                                             new_byte);
+                               }
+                               new_byte = maj_byte;
+                       } else {
+                               if (rl_print) {
+                                       debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+                                             node, if_num, rankx, i, new_byte);
+                               }
+                       }
+
+                       if (!disable_rank_majority) {
+                               // rank majority is dependent on the rank
+                               // counts, which are relative to best_byte,
+                               // so start there, and adjust according to the
+                               // rank counts majority
+                               rank_maj = best_byte;
+                               if (rank_more > rank_same &&
+                                   rank_more > rank_less) {
+                                       rank_maj++;
+                               } else if (rank_less > rank_same &&
+                                          rank_less > rank_more) {
+                                       rank_maj--;
+                               }
+                               rank_sum = rank_more + rank_same + rank_less;
+
+                               // now, let rank majority possibly rule over
+                               // the current new_byte however we got it
+                               if (rank_maj != new_byte) { // only if different
+                                       // Here is where we decide whether to
+                                       // completely apply RANK_MAJORITY or not
+                                       // ignore if less than
+                                       if (rank_maj < new_byte) {
+                                               if (rl_print) {
+                                                       debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
+                                                             node, if_num,
+                                                             rankx, i,
+                                                             rank_maj,
+                                                             new_byte);
+                                               }
+                                       } else {
+                                               // For the moment, we do it
+                                               // ONLY when running 2-slot
+                                               // configs
+                                               //  OR when rank_sum is big
+                                               // enough
+                                               if (dimm_count > 1 ||
+                                                   rank_sum > 2) {
+                                                       // print only when rank
+                                                       // majority choice is
+                                                       // selected
+                                                       if (rl_print) {
+                                                               debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
+                                                                     node,
+                                                                     if_num,
+                                                                     rankx,
+                                                                     i,
+                                                                     rank_maj,
+                                                                     new_byte);
+                                                       }
+                                                       new_byte = rank_maj;
+                                               } else {
+                                                       // FIXME: print some
+                                                       // info when we could
+                                                       // have chosen RANKMAJ
+                                                       // but did not
+                                                       if (rl_print) {
+                                                               debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
+                                                                     node,
+                                                                     if_num,
+                                                                     rankx,
+                                                                     i,
+                                                                     rank_maj,
+                                                                     new_byte,
+                                                                     best_byte,
+                                                                     rank_sum);
+                                                       }
+                                               }
+                                       }
+                               }
+                       } /* if (!disable_rank_majority) */
+               }
+               // one last check:
+               // if new_byte is still count_byte, BUT there was no count
+               // for that value, DO SOMETHING!!!
+               // FIXME: go back to original best byte from the best row
+               if (new_byte == count_byte && count_same == 0) {
+                       new_byte = orig_best_byte;
+                       if (rl_print) {
+                               debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
+                                     node, if_num, rankx, i, new_byte);
+                       }
+               }
+               // Look at counts for "perfect" bitmasks (PBMs) if we had
+               // any for this byte-lane.
+               // Remember, we only counted for DDR4, so zero means none
+               // or DDR3, and we bypass this...
+               value_mask = rank_perf[rankx].mask[i];
+               disable_rlv_bump_this_byte = 0;
+
+               if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
+                       int i, delay_count, delay_max = 0, del_val = 0;
+                       int num_values = __builtin_popcountll(value_mask);
+                       int sum_counts = 0;
+                       u64 temp_mask = value_mask;
+
+                       disable_rlv_bump_this_byte = 1;
+                       i = __builtin_ffsll(temp_mask) - 1;
+                       if (rl_print)
+                               debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
+                                     node, if_num, rankx, i, value_mask >> i,
+                                     num_values);
+
+                       while (temp_mask != 0) {
+                               i = __builtin_ffsll(temp_mask) - 1;
+                               delay_count = rank_perf[rankx].count[i][i];
+                               sum_counts += delay_count;
+                               if (rl_print)
+                                       debug("%2d(%2d) ", i, delay_count);
+                               if (delay_count >= delay_max) {
+                                       delay_max = delay_count;
+                                       del_val = i;
+                               }
+                               temp_mask &= ~(1UL << i);
+                       } /* while (temp_mask != 0) */
+
+                       // if sum_counts is small, just use NEW_BYTE
+                       if (sum_counts < pbm_lowsum_limit) {
+                               if (rl_print)
+                                       debug(": LOWSUM (%2d), choose ORIG ",
+                                             sum_counts);
+                               del_val = new_byte;
+                               delay_max = rank_perf[rankx].count[i][del_val];
+                       }
+
+                       // finish printing here...
+                       if (rl_print) {
+                               debug(": USING %2d (%2d) D%d\n", del_val,
+                                     delay_max, disable_rlv_bump_this_byte);
+                       }
+
+                       new_byte = del_val; // override with best PBM choice
+
+               } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
+                       //                        if (value_mask != 0) {
+                       int i, delay_count, del_val;
+                       int num_values = __builtin_popcountll(value_mask);
+                       int sum_counts = 0;
+                       u64 temp_mask = value_mask;
+
+                       i = __builtin_ffsll(temp_mask) - 1;
+                       if (rl_print)
+                               debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
+                                     node, if_num, rankx, i, value_mask >> i,
+                                     num_values);
+                       while (temp_mask != 0) {
+                               i = __builtin_ffsll(temp_mask) - 1;
+                               delay_count = rank_perf[rankx].count[i][i];
+                               sum_counts += delay_count;
+                               if (rl_print)
+                                       debug("%2d(%2d) ", i, delay_count);
+                               temp_mask &= ~(1UL << i);
+                       } /* while (temp_mask != 0) */
+
+                       del_val = __builtin_ffsll(value_mask) - 1;
+                       delay_count =
+                               rank_perf[rankx].count[i][del_val];
+
+                       // overkill, normally only 1-4 bits
+                       i = (value_mask >> del_val) & 0x1F;
+
+                       // if sum_counts is small, treat as special and use
+                       // NEW_BYTE
+                       if (sum_counts < pbm_lowsum_limit) {
+                               if (rl_print)
+                                       debug(": LOWSUM (%2d), choose ORIG",
+                                             sum_counts);
+                               i = 99; // SPECIAL case...
+                       }
+
+                       switch (i) {
+                       case 0x01 /* 00001b */:
+                               // allow BUMP
+                               break;
+
+                       case 0x13 /* 10011b */:
+                       case 0x0B /* 01011b */:
+                       case 0x03 /* 00011b */:
+                               del_val += 1; // take the second
+                               disable_rlv_bump_this_byte = 1; // allow no BUMP
+                               break;
+
+                       case 0x0D /* 01101b */:
+                       case 0x05 /* 00101b */:
+                               // test count of lowest and all
+                               if (delay_count >= 5 || sum_counts <= 5)
+                                       del_val += 1; // take the hole
+                               else
+                                       del_val += 2; // take the next set
+                               disable_rlv_bump_this_byte = 1; // allow no BUMP
+                               break;
+
+                       case 0x0F /* 01111b */:
+                       case 0x17 /* 10111b */:
+                       case 0x07 /* 00111b */:
+                               del_val += 1; // take the second
+                               if (delay_count < 5) { // lowest count is small
+                                       int second =
+                                               rank_perf[rankx].count[i][del_val];
+                                       int third =
+                                               rank_perf[rankx].count[i][del_val + 1];
+                                       // test if middle is more than 1 OR
+                                       // top is more than 1;
+                                       // this means if they are BOTH 1,
+                                       // then we keep the second...
+                                       if (second > 1 || third > 1) {
+                                               // if middle is small OR top
+                                               // is large
+                                               if (second < 5 ||
+                                                   third > 1) {
+                                                       // take the top
+                                                       del_val += 1;
+                                                       if (rl_print)
+                                                               debug(": TOP7 ");
+                                               }
+                                       }
+                               }
+                               disable_rlv_bump_this_byte = 1; // allow no BUMP
+                               break;
+
+                       default: // all others...
+                               if (rl_print)
+                                       debug(": ABNORMAL, choose ORIG");
+
+                       case 99: // special
+                                // FIXME: choose original choice?
+                               del_val = new_byte;
+                               disable_rlv_bump_this_byte = 1; // allow no BUMP
+                               break;
+                       }
+                       delay_count =
+                               rank_perf[rankx].count[i][del_val];
+
+                       // finish printing here...
+                       if (rl_print)
+                               debug(": USING %2d (%2d) D%d\n", del_val,
+                                     delay_count, disable_rlv_bump_this_byte);
+                       new_byte = del_val; // override with best PBM choice
+               } else {
+                       if (ddr_type == DDR4_DRAM) { // only report when DDR4
+                               // FIXME: remove or increase VBL for this
+                               // output...
+                               if (rl_print)
+                                       debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
+                                             node, if_num, rankx, i,
+                                             new_byte);
+                               // prevent ODD bump, rely on original
+                               disable_rlv_bump_this_byte = 1;
+                       }
+               } /* if (value_mask != 0) */
+
+               // optionally bump the delay value
+               if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
+                       if ((new_byte & enable_rldelay_bump) ==
+                           enable_rldelay_bump) {
+                               int bump_value = new_byte + rldelay_bump_incr;
+
+                               if (rl_print) {
+                                       debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
+                                             node, if_num, rankx, i,
+                                             new_byte, bump_value,
+                                             (value_mask &
+                                              (1 << bump_value)) ?
+                                             "PBM" : "NOPBM");
+                               }
+                               new_byte = bump_value;
+                       }
+               }
+
+               // last checks for count-related purposes
+               if (new_byte == best_byte && count_more > 0 &&
+                   count_less == 0) {
+                       // we really should take best_byte + 1
+                       if (rl_print) {
+                               debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
+                                     node, if_num, rankx, i,
+                                     new_byte, best_byte + 1);
+                               new_byte = best_byte + 1;
+                       }
+               } else if ((new_byte < best_byte) && (count_same > 0)) {
+                       // we really should take best_byte
+                       if (rl_print) {
+                               debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
+                                     node, if_num, rankx, i,
+                                     new_byte, best_byte);
+                               new_byte = best_byte;
+                       }
+               } else if (new_byte > best_byte) {
+                       if ((new_byte == (best_byte + 1)) &&
+                           count_more == 0 && count_less > 0) {
+                               // we really should take best_byte
+                               if (rl_print) {
+                                       debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
+                                             node, if_num, rankx, i,
+                                             new_byte, best_byte);
+                                       new_byte = best_byte;
+                               }
+                       } else if ((new_byte >= (best_byte + 2)) &&
+                                  ((count_more > 0) || (count_same > 0))) {
+                               if (rl_print) {
+                                       debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
+                                             node, if_num, rankx, i,
+                                             new_byte, best_byte + 1);
+                                       new_byte = best_byte + 1;
+                               }
+                       }
+               }
+
+               if (rl_print) {
+                       debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
+                             node, if_num, rankx, i, orig_best_byte,
+                             best_byte, count_more, count_same, count_less,
+                             new_byte);
+               }
+
+               // update the byte with the new value (NOTE: orig value in
+               // the CSR may not be current "best")
+               upd_rl_rank(&rl_rank, i, new_byte);
+
+               // save new best for neighbor use
+               rank_best_bytes[i] = new_byte;
+       } /* for (i = 0; i < 8+ecc_ena; i++) */
+
+       ////////////////// this is the end of the BEST BYTE LOOP
+
+       if (saved_rl_rank.u64 != rl_rank.u64) {
+               lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
+                      rl_rank.u64);
+               rl_rank.u64 = lmc_rd(priv,
+                                    CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+               debug("Adjusting Read-Leveling per-RANK settings.\n");
+       } else {
+               debug("Not Adjusting Read-Leveling per-RANK settings.\n");
+       }
+       display_rl_with_final(if_num, rl_rank, rankx);
+
+       // FIXME: does this help make the output a little easier to focus?
+       if (rl_print > 0)
+               debug("-----------\n");
+
+#define RLEVEL_RANKX_EXTRAS_INCR  0
+       // if there are unused entries to be filled
+       if ((rank_mask & 0x0f) != 0x0f) {
+               // copy the current rank
+               union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
+
+               if (rankx < 3) {
+#if RLEVEL_RANKX_EXTRAS_INCR > 0
+                       int byte, delay;
+
+                       // modify the copy in prep for writing to empty slot(s)
+                       for (byte = 0; byte < 9; byte++) {
+                               delay = get_rl_rank(&temp_rl_rank, byte) +
+                                       RLEVEL_RANKX_EXTRAS_INCR;
+                               if (delay > RLEVEL_BYTE_MSK)
+                                       delay = RLEVEL_BYTE_MSK;
+                               upd_rl_rank(&temp_rl_rank, byte, delay);
+                       }
+#endif
+
+                       // if rank 0, write rank 1 and rank 2 here if empty
+                       if (rankx == 0) {
+                               // check that rank 1 is empty
+                               if (!(rank_mask & (1 << 1))) {
+                                       debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+                                             node, if_num, rankx, 1);
+                                       lmc_wr(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(1,
+                                                                     if_num),
+                                              temp_rl_rank.u64);
+                               }
+
+                               // check that rank 2 is empty
+                               if (!(rank_mask & (1 << 2))) {
+                                       debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+                                             node, if_num, rankx, 2);
+                                       lmc_wr(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(2,
+                                                                     if_num),
+                                              temp_rl_rank.u64);
+                               }
+                       }
+
+                       // if ranks 0, 1 or 2, write rank 3 here if empty
+                       // check that rank 3 is empty
+                       if (!(rank_mask & (1 << 3))) {
+                               debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+                                     node, if_num, rankx, 3);
+                               lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
+                                      temp_rl_rank.u64);
+                       }
+               }
+       }
+}
+
+static void lmc_read_leveling(struct ddr_priv *priv)
+{
+       struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
+       union cvmx_lmcx_control ctl;
+       union cvmx_lmcx_config cfg;
+       int rankx;
+       char *s;
+       int i;
+
+       /*
+        * 4.8.10 LMC Read Leveling
+        *
+        * LMC supports an automatic read-leveling separately per byte-lane
+        * using the DDR3 multipurpose register predefined pattern for system
+        * calibration defined in the JEDEC DDR3 specifications.
+        *
+        * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
+        * must be completed prior to starting this LMC read-leveling sequence.
+        *
+        * Software could simply write the desired read-leveling values into
+        * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
+        * LMC's autoread-leveling capabilities.
+        *
+        * When LMC does the read-leveling sequence for a rank, it first enables
+        * the DDR3 multipurpose register predefined pattern for system
+        * calibration on the selected DRAM rank via a DDR3 MR3 write, then
+        * executes 64 RD operations at different internal delay settings, then
+        * disables the predefined pattern via another DDR3 MR3 write
+        * operation. LMC determines the pass or fail of each of the 64 settings
+        * independently for each byte lane, then writes appropriate
+        * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
+        *
+        * After read-leveling for a rank, software can read the 64 pass/fail
+        * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
+        * Software can observe all pass/fail results for all byte lanes in a
+        * rank via separate read-leveling sequences on the rank with different
+        * LMC(0)_RLEVEL_CTL[BYTE] values.
+        *
+        * The 64 pass/fail results will typically have failures for the low
+        * delays, followed by a run of some passing settings, followed by more
+        * failures in the remaining high delays.  LMC sets
+        * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
+        * First, LMC selects the longest run of successes in the 64 results.
+        * (In the unlikely event that there is more than one longest run, LMC
+        * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
+        * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
+        * LMC selects the last passing setting in the run minus
+        * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
+        * in the run (rounding earlier when necessary). We expect the
+        * read-leveling sequence to produce good results with the reset values
+        * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
+        *
+        * The read-leveling sequence has the following steps:
+        *
+        * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
+        *    Do the remaining substeps 2-4 separately for each rank i with
+        *    attached DRAM.
+        *
+        * 2. Without changing any other fields in LMC(0)_CONFIG,
+        *
+        *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
+        *
+        *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+        *
+        *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
+        *
+        *    This initiates the previously-described read-leveling.
+        *
+        * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
+        *
+        *    LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
+        *    lanes at this point.
+        *
+        *    If ECC DRAM is not present (i.e. when DRAM is not attached to the
+        *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+        *    DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
+        *    LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
+        *    LMC(0)_RLEVEL_RANK*[BYTE0].
+        *
+        * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
+        *    LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
+        *    LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
+        *    LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
+        *    BITMASKs can be observed.
+        *
+        * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
+        *
+        *    Let rank i be a rank with attached DRAM.
+        *
+        *    For all ranks j that do not have attached DRAM, set
+        *    LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
+        *
+        * This read-leveling sequence can help select the proper CN70XX ODT
+        * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
+        * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
+        * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
+        * (for a used byte lane k) can indicate that the CN70XX ODT value is
+        * bad. It is possible to simultaneously optimize both
+        * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
+        * performing this read-leveling sequence for several
+        * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
+        * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
+        */
+
+       rl_rodt_err = 0;
+       rl_dbg_loops = 1;
+       saved_int_zqcs_dis = 0;
+       max_adj_rl_del_inc = 0;
+       rl_print = RLEVEL_PRINTALL_DEFAULT;
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+       part_number[21] = {0};
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+       pbm_lowsum_limit = 5; // FIXME: is this a good default?
+       // FIXME: PBM skip for RODT 240 and 34
+       pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
+               (1U << ddr4_rodt_ctl_34_ohm);
+
+       disable_rank_majority = 0; // control rank majority processing
+
+       // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
+       // for DDR3
+       rldelay_bump_incr = 0;
+       disable_rlv_bump_this_byte = 0;
+
+       enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
+               ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
+
+       s = lookup_env(priv, "ddr_disable_rank_majority");
+       if (s)
+               disable_rank_majority = !!simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_pbm_lowsum_limit");
+       if (s)
+               pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_pbm_rodt_skip");
+       if (s)
+               pbm_rodt_skip = simple_strtoul(s, NULL, 0);
+       memset(rank_perf, 0, sizeof(rank_perf));
+
+       ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       save_ddr2t = ctl.cn78xx.ddr2t;
+
+       cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       ecc_ena = cfg.cn78xx.ecc_ena;
+
+       s = lookup_env(priv, "ddr_rlevel_2t");
+       if (s)
+               ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
+
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+       debug("LMC%d: Performing Read-Leveling\n", if_num);
+
+       rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+
+       rl_samples = c_cfg->rlevel_average_loops;
+       if (rl_samples == 0) {
+               rl_samples = RLEVEL_SAMPLES_DEFAULT;
+               // up the samples for these cases
+               if (dimm_count == 1 || num_ranks == 1)
+                       rl_samples = rl_samples * 2 + 1;
+       }
+
+       rl_compute = c_cfg->rlevel_compute;
+       rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
+       rl_ctl.cn78xx.offset    = spd_rdimm
+               ? c_cfg->offset_rdimm
+               : c_cfg->offset_udimm;
+
+       int value = 1; // should ALWAYS be set
+
+       s = lookup_env(priv, "ddr_rlevel_delay_unload");
+       if (s)
+               value = !!simple_strtoul(s, NULL, 0);
+       rl_ctl.cn78xx.delay_unload_0 = value;
+       rl_ctl.cn78xx.delay_unload_1 = value;
+       rl_ctl.cn78xx.delay_unload_2 = value;
+       rl_ctl.cn78xx.delay_unload_3 = value;
+
+       // use OR_DIS=1 to try for better results
+       rl_ctl.cn78xx.or_dis = 1;
+
+       /*
+        * If we will be switching to 32bit mode level based on only
+        * four bits because there are only 4 ECC bits.
+        */
+       rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
+
+       // allow overrides
+       s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
+       if (s)
+               rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
+       if (s)
+               rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
+
+       rl_comp_offs = spd_rdimm
+               ? c_cfg->rlevel_comp_offset_rdimm
+               : c_cfg->rlevel_comp_offset_udimm;
+       s = lookup_env(priv, "ddr_rlevel_comp_offset");
+       if (s)
+               rl_comp_offs = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_offset");
+       if (s)
+               rl_ctl.cn78xx.offset   = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_offset_en");
+       if (s)
+               rl_ctl.cn78xx.offset_en   = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_ctl");
+       if (s)
+               rl_ctl.u64   = simple_strtoul(s, NULL, 0);
+
+       lmc_wr(priv,
+              CVMX_LMCX_RLEVEL_CTL(if_num),
+              rl_ctl.u64);
+
+       // do this here so we can look at final RLEVEL_CTL[offset] setting...
+       s = lookup_env(priv, "ddr_enable_rldelay_bump");
+       if (s) {
+               // also use as mask bits
+               enable_rldelay_bump = strtoul(s, NULL, 0);
+       }
+
+       if (enable_rldelay_bump != 0)
+               rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
+
+       s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
+       if (s)
+               rl_dbg_loops = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rtt_nom_auto");
+       if (s)
+               ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_average");
+       if (s)
+               rl_samples = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_compute");
+       if (s)
+               rl_compute = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_rlevel_printall");
+       if (s)
+               rl_print = simple_strtoul(s, NULL, 0);
+
+       debug("RLEVEL_CTL                                    : 0x%016llx\n",
+             rl_ctl.u64);
+       debug("RLEVEL_OFFSET                                 : %6d\n",
+             rl_ctl.cn78xx.offset);
+       debug("RLEVEL_OFFSET_EN                              : %6d\n",
+             rl_ctl.cn78xx.offset_en);
+
+       /*
+        * The purpose for the indexed table is to sort the settings
+        * by the ohm value to simplify the testing when incrementing
+        * through the settings.  (index => ohms) 1=120, 2=60, 3=40,
+        * 4=30, 5=20
+        */
+       min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
+               1 : c_cfg->min_rtt_nom_idx;
+       max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
+               5 : c_cfg->max_rtt_nom_idx;
+
+       min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
+       max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
+
+       s = lookup_env(priv, "ddr_min_rodt_ctl");
+       if (s)
+               min_rodt_ctl = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_max_rodt_ctl");
+       if (s)
+               max_rodt_ctl = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_min_rtt_nom_idx");
+       if (s)
+               min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_max_rtt_nom_idx");
+       if (s)
+               max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+       if (c_cfg->rl_tbl) {
+               /* Check for hard-coded read-leveling settings */
+               get_dimm_part_number(part_number, &dimm_config_table[0],
+                                    0, ddr_type);
+               for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+                       if (!(rank_mask & (1 << rankx)))
+                               continue;
+
+                       rl_rank.u64 = lmc_rd(priv,
+                                            CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                   if_num));
+
+                       i = 0;
+                       while (c_cfg->rl_tbl[i].part) {
+                               debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
+                                     c_cfg->rl_tbl[i].part, part_number);
+                               if ((strcmp(part_number,
+                                           c_cfg->rl_tbl[i].part) == 0) &&
+                                   (abs(c_cfg->rl_tbl[i].speed -
+                                        2 * ddr_hertz / (1000 * 1000)) < 10)) {
+                                       debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
+                                             part_number);
+                                       rl_rank.u64 =
+                                               c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
+                                       lmc_wr(priv,
+                                              CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                     if_num),
+                                              rl_rank.u64);
+                                       rl_rank.u64 =
+                                               lmc_rd(priv,
+                                                      CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                             if_num));
+                                       display_rl(if_num, rl_rank, rankx);
+                                       /* Disable h/w read-leveling */
+                                       rl_dbg_loops = 0;
+                                       break;
+                               }
+                               ++i;
+                       }
+               }
+       }
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+       max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
+       s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
+       if (s)
+               max_adj_rl_del_inc = strtoul(s, NULL, 0);
+
+       while (rl_dbg_loops--) {
+               union cvmx_lmcx_modereg_params1 mp1;
+               union cvmx_lmcx_comp_ctl2 cc2;
+
+               /* Initialize the error scoreboard */
+               memset(rl_score, 0, sizeof(rl_score));
+
+               cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+               saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
+               saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
+
+               /* Disable dynamic compensation settings */
+               if (rl_comp_offs != 0) {
+                       cc2.cn78xx.ptune = saved_ddr__ptune;
+                       cc2.cn78xx.ntune = saved_ddr__ntune;
+
+                       /*
+                        * Round up the ptune calculation to bias the odd
+                        * cases toward ptune
+                        */
+                       cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
+                       cc2.cn78xx.ntune -= rl_comp_offs / 2;
+
+                       ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+                       saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
+                       /* Disable ZQCS while in bypass. */
+                       ctl.s.int_zqcs_dis = 1;
+                       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+                       cc2.cn78xx.byp = 1; /* Enable bypass mode */
+                       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+                       lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+                       /* Read again */
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+                       debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+                             cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
+               }
+
+               mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+               for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+                    ++rtt_idx) {
+                       rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+                       /*
+                        * When the read ODT mask is zero the dyn_rtt_nom_mask
+                        * is zero than RTT_NOM will not be changing during
+                        * read-leveling.  Since the value is fixed we only need
+                        * to test it once.
+                        */
+                       if (dyn_rtt_nom_mask == 0) {
+                               // flag not to print NOM ohms
+                               print_nom_ohms = -1;
+                       } else {
+                               if (dyn_rtt_nom_mask & 1)
+                                       mp1.s.rtt_nom_00 = rtt_nom;
+                               if (dyn_rtt_nom_mask & 2)
+                                       mp1.s.rtt_nom_01 = rtt_nom;
+                               if (dyn_rtt_nom_mask & 4)
+                                       mp1.s.rtt_nom_10 = rtt_nom;
+                               if (dyn_rtt_nom_mask & 8)
+                                       mp1.s.rtt_nom_11 = rtt_nom;
+                               // FIXME? rank 0 ohms always?
+                               print_nom_ohms =
+                                       imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
+                       }
+
+                       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
+                              mp1.u64);
+
+                       if (print_nom_ohms >= 0 && rl_print > 1) {
+                               debug("\n");
+                               debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                                     imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+                                     imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+                                     imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+                                     imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+                                     mp1.s.rtt_nom_11,
+                                     mp1.s.rtt_nom_10,
+                                     mp1.s.rtt_nom_01,
+                                     mp1.s.rtt_nom_00);
+                       }
+
+                       ddr_init_seq(priv, rank_mask, if_num);
+
+                       // Try RANK outside RODT to rearrange the output...
+                       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+                               if (!(rank_mask & (1 << rankx)))
+                                       continue;
+
+                               for (rodt_ctl = max_rodt_ctl;
+                                    rodt_ctl >= min_rodt_ctl; --rodt_ctl)
+                                       rodt_loop(priv, rankx, rl_score);
+                       }
+               }
+
+               /* Re-enable dynamic compensation settings. */
+               if (rl_comp_offs != 0) {
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+                       cc2.cn78xx.ptune = 0;
+                       cc2.cn78xx.ntune = 0;
+                       cc2.cn78xx.byp = 0; /* Disable bypass mode */
+                       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+                       /* Read once */
+                       lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+                       /* Read again */
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+                       debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+                             cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
+
+                       ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+                       /* Restore original setting */
+                       ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
+                       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+               }
+
+               int override_compensation = 0;
+
+               s = lookup_env(priv, "ddr__ptune");
+               if (s)
+                       saved_ddr__ptune = strtoul(s, NULL, 0);
+
+               s = lookup_env(priv, "ddr__ntune");
+               if (s) {
+                       saved_ddr__ntune = strtoul(s, NULL, 0);
+                       override_compensation = 1;
+               }
+
+               if (override_compensation) {
+                       cc2.cn78xx.ptune = saved_ddr__ptune;
+                       cc2.cn78xx.ntune = saved_ddr__ntune;
+
+                       ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+                       saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
+                       /* Disable ZQCS while in bypass. */
+                       ctl.s.int_zqcs_dis = 1;
+                       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+                       cc2.cn78xx.byp = 1; /* Enable bypass mode */
+                       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+                       /* Read again */
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+                       debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+                             cc2.cn78xx.ptune, cc2.cn78xx.ntune);
+               }
+
+               /* Evaluation block */
+               /* Still at initial value? */
+               int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
+               int auto_rodt_ctl = 0;
+               int auto_rtt_nom  = 0;
+               int rodt_score;
+
+               rodt_row_skip_mask = 0;
+
+               // just add specific RODT rows to the skip mask for DDR4
+               // at this time...
+               if (ddr_type == DDR4_DRAM) {
+                       // skip RODT row 34 ohms for all DDR4 types
+                       rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
+                       // skip RODT row 40 ohms for all DDR4 types
+                       rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
+                       // For now, do not skip RODT row 40 or 48 ohm when
+                       // ddr_hertz is above 1075 MHz
+                       if (ddr_hertz > 1075000000) {
+                               // noskip RODT row 40 ohms
+                               rodt_row_skip_mask &=
+                                       ~(1 << ddr4_rodt_ctl_40_ohm);
+                               // noskip RODT row 48 ohms
+                               rodt_row_skip_mask &=
+                                       ~(1 << ddr4_rodt_ctl_48_ohm);
+                       }
+                       // For now, do not skip RODT row 48 ohm for 2Rx4
+                       // stacked die DIMMs
+                       if (is_stacked_die && num_ranks == 2 &&
+                           dram_width == 4) {
+                               // noskip RODT row 48 ohms
+                               rodt_row_skip_mask &=
+                                       ~(1 << ddr4_rodt_ctl_48_ohm);
+                       }
+                       // for now, leave all rows eligible when we have
+                       // mini-DIMMs...
+                       if (spd_dimm_type == 5 || spd_dimm_type == 6)
+                               rodt_row_skip_mask = 0;
+                       // for now, leave all rows eligible when we have
+                       // a 2-slot 1-rank config
+                       if (dimm_count == 2 && num_ranks == 1)
+                               rodt_row_skip_mask = 0;
+
+                       debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
+                       for (rtt_idx = min_rtt_nom_idx;
+                            rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+                               rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+                               for (rodt_ctl = max_rodt_ctl;
+                                    rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+                                       rodt_score = 0;
+                                       for (rankx = 0; rankx < dimm_count * 4;
+                                            rankx++) {
+                                               if (!(rank_mask & (1 << rankx)))
+                                                       continue;
+
+                                               debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
+                                                     rtt_nom, rodt_ctl, rankx,
+                                                     rl_score[rtt_nom][rodt_ctl][rankx].score);
+                                               rodt_score +=
+                                                       rl_score[rtt_nom][rodt_ctl][rankx].score;
+                                       }
+                                       // FIXME: do we need to skip RODT rows
+                                       // here, like we do below in the
+                                       // by-RANK settings?
+
+                                       /*
+                                        * When using automatic ODT settings use
+                                        * the ODT settings associated with the
+                                        * best score for all of the tested ODT
+                                        * combinations.
+                                        */
+
+                                       if (rodt_score < best_rodt_score ||
+                                           (rodt_score == best_rodt_score &&
+                                            (imp_val->rodt_ohms[rodt_ctl] >
+                                             imp_val->rodt_ohms[auto_rodt_ctl]))) {
+                                               debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
+                                                     rodt_ctl,
+                                                     imp_val->rodt_ohms[rodt_ctl],
+                                                     rodt_score,
+                                                     best_rodt_score);
+                                               best_rodt_score = rodt_score;
+                                               auto_rodt_ctl   = rodt_ctl;
+                                               auto_rtt_nom    = rtt_nom;
+                                       }
+                               }
+                       }
+
+                       mp1.u64 = lmc_rd(priv,
+                                        CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+                       if (ddr_rtt_nom_auto) {
+                               /* Store the automatically set RTT_NOM value */
+                               if (dyn_rtt_nom_mask & 1)
+                                       mp1.s.rtt_nom_00 = auto_rtt_nom;
+                               if (dyn_rtt_nom_mask & 2)
+                                       mp1.s.rtt_nom_01 = auto_rtt_nom;
+                               if (dyn_rtt_nom_mask & 4)
+                                       mp1.s.rtt_nom_10 = auto_rtt_nom;
+                               if (dyn_rtt_nom_mask & 8)
+                                       mp1.s.rtt_nom_11 = auto_rtt_nom;
+                       } else {
+                               /*
+                                * restore the manual settings to the register
+                                */
+                               mp1.s.rtt_nom_00 = default_rtt_nom[0];
+                               mp1.s.rtt_nom_01 = default_rtt_nom[1];
+                               mp1.s.rtt_nom_10 = default_rtt_nom[2];
+                               mp1.s.rtt_nom_11 = default_rtt_nom[3];
+                       }
+
+                       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
+                              mp1.u64);
+                       debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+                             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+                             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+                             imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+                             mp1.s.rtt_nom_11,
+                             mp1.s.rtt_nom_10,
+                             mp1.s.rtt_nom_01,
+                             mp1.s.rtt_nom_00);
+
+                       debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
+                             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
+                             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
+                             imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
+                             extr_wr(mp1.u64, 3),
+                             extr_wr(mp1.u64, 2),
+                             extr_wr(mp1.u64, 1),
+                             extr_wr(mp1.u64, 0));
+
+                       debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                             imp_val->dic_ohms[mp1.s.dic_11],
+                             imp_val->dic_ohms[mp1.s.dic_10],
+                             imp_val->dic_ohms[mp1.s.dic_01],
+                             imp_val->dic_ohms[mp1.s.dic_00],
+                             mp1.s.dic_11,
+                             mp1.s.dic_10,
+                             mp1.s.dic_01,
+                             mp1.s.dic_00);
+
+                       if (ddr_type == DDR4_DRAM) {
+                               union cvmx_lmcx_modereg_params2 mp2;
+                               /*
+                                * We must read the CSR, and not depend on
+                                * odt_config[odt_idx].odt_mask2, since we could
+                                * have overridden values with envvars.
+                                * NOTE: this corrects the printout, since the
+                                * CSR is not written with the old values...
+                                */
+                               mp2.u64 = lmc_rd(priv,
+                                                CVMX_LMCX_MODEREG_PARAMS2(if_num));
+
+                               debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+                                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
+                                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
+                                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
+                                     imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
+                                     mp2.s.rtt_park_11,
+                                     mp2.s.rtt_park_10,
+                                     mp2.s.rtt_park_01,
+                                     mp2.s.rtt_park_00);
+
+                               debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
+                                     "VREF_RANGE",
+                                     mp2.s.vref_range_11,
+                                     mp2.s.vref_range_10,
+                                     mp2.s.vref_range_01,
+                                     mp2.s.vref_range_00);
+
+                               debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
+                                     "VREF_VALUE",
+                                     mp2.s.vref_value_11,
+                                     mp2.s.vref_value_10,
+                                     mp2.s.vref_value_01,
+                                     mp2.s.vref_value_00);
+                       }
+
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+                       if (ddr_rodt_ctl_auto) {
+                               cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
+                       } else {
+                               // back to the original setting
+                               cc2.cn78xx.rodt_ctl = default_rodt_ctl;
+                       }
+                       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+                       cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+                       debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+                             cc2.cn78xx.rodt_ctl,
+                             imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+
+                       /*
+                        * Use the delays associated with the best score for
+                        * each individual rank
+                        */
+                       debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
+
+                       // this is the the RANK MAJOR LOOP
+                       for (rankx = 0; rankx < dimm_count * 4; rankx++)
+                               rank_major_loop(priv, rankx, rl_score);
+               }  /* Evaluation block */
+       } /* while(rl_dbg_loops--) */
+
+       ctl.cn78xx.ddr2t = save_ddr2t;
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+       ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       /* Display final 2T value */
+       debug("DDR2T                                         : %6d\n",
+             ctl.cn78xx.ddr2t);
+
+       ddr_init_seq(priv, rank_mask, if_num);
+
+       for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+               u64 value;
+               int parameter_set = 0;
+
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                 if_num));
+
+               for (i = 0; i < 9; ++i) {
+                       s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
+                                      if_num, rankx, i);
+                       if (s) {
+                               parameter_set |= 1;
+                               value = simple_strtoul(s, NULL, 0);
+
+                               upd_rl_rank(&rl_rank, i, value);
+                       }
+               }
+
+               s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
+               if (s) {
+                       parameter_set |= 1;
+                       value = simple_strtoull(s, NULL, 0);
+                       rl_rank.u64 = value;
+               }
+
+               if (parameter_set) {
+                       lmc_wr(priv,
+                              CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
+                              rl_rank.u64);
+                       rl_rank.u64 = lmc_rd(priv,
+                                            CVMX_LMCX_RLEVEL_RANKX(rankx,
+                                                                   if_num));
+                       display_rl(if_num, rl_rank, rankx);
+               }
+       }
+}
+
+int init_octeon3_ddr3_interface(struct ddr_priv *priv,
+                               struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
+                               u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
+                               u32 _if_mask)
+{
+       union cvmx_lmcx_control ctrl;
+       int ret;
+       char *s;
+       int i;
+
+       if_num = _if_num;
+       ddr_hertz = _ddr_hertz;
+       ddr_conf = _ddr_conf;
+       if_mask = _if_mask;
+       odt_1rank_config = ddr_conf->odt_1rank_config;
+       odt_2rank_config = ddr_conf->odt_2rank_config;
+       odt_4rank_config = ddr_conf->odt_4rank_config;
+       dimm_config_table = ddr_conf->dimm_config_table;
+       c_cfg = &ddr_conf->custom_lmc_config;
+
+       /*
+        * Compute clock rates to the nearest picosecond.
+        */
+       tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
+       eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
+
+       dimm_count = 0;
+       /* Accumulate and report all the errors before giving up */
+       fatal_error = 0;
+
+       /* Flag that indicates safe DDR settings should be used */
+       safe_ddr_flag = 0;
+       if_64b = 1;             /* Octeon II Default: 64bit interface width */
+       mem_size_mbytes = 0;
+       bank_bits = 0;
+       column_bits_start = 1;
+       use_ecc = 1;
+       min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
+       spd_package = 0;
+       spd_rawcard = 0;
+       spd_rawcard_aorb = 0;
+       spd_rdimm_registers = 0;
+       is_stacked_die = 0;
+       is_3ds_dimm = 0;        // 3DS
+       lranks_per_prank = 1;   // 3DS: logical ranks per package rank
+       lranks_bits = 0;        // 3DS: logical ranks bits
+       die_capacity = 0;       // in Mbits; only used for 3DS
+
+       wl_mask_err = 0;
+       dyn_rtt_nom_mask = 0;
+       ddr_disable_chip_reset = 1;
+       match_wl_rtt_nom = 0;
+
+       internal_retries = 0;
+
+       disable_deskew_training = 0;
+       restart_if_dsk_incomplete = 0;
+       last_lane = ((if_64b) ? 8 : 4) + use_ecc;
+
+       disable_sequential_delay_check = 0;
+       wl_print = WLEVEL_PRINTALL_DEFAULT;
+
+       enable_by_rank_init = 1;        // FIXME: default by-rank ON
+       saved_rank_mask = 0;
+
+       node = 0;
+
+       memset(hwl_alts, 0, sizeof(hwl_alts));
+
+       /*
+        * Initialize these to shut up the compiler. They are configured
+        * and used only for DDR4
+        */
+       ddr4_trrd_lmin = 6000;
+       ddr4_tccd_lmin = 6000;
+
+       debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
+             node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
+
+       if (dimm_config_table[0].spd_addrs[0] == 0 &&
+           !dimm_config_table[0].spd_ptrs[0]) {
+               printf("ERROR: No dimms specified in the dimm_config_table.\n");
+               return -1;
+       }
+
+       // allow some overrides to be done
+
+       // this one controls several things related to DIMM geometry: HWL and RL
+       disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
+       s = lookup_env(priv, "ddr_disable_sequential_delay_check");
+       if (s)
+               disable_sequential_delay_check = strtoul(s, NULL, 0);
+
+       // this one controls whether chip RESET is done, or LMC init restarted
+       // from step 6.9.6
+       s = lookup_env(priv, "ddr_disable_chip_reset");
+       if (s)
+               ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
+
+       // this one controls whether Deskew Training is performed
+       s = lookup_env(priv, "ddr_disable_deskew_training");
+       if (s)
+               disable_deskew_training = !!strtoul(s, NULL, 0);
+
+       if (ddr_verbose(priv)) {
+               printf("DDR SPD Table:");
+               for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+                       if (dimm_config_table[didx].spd_addrs[0] == 0)
+                               break;
+
+                       printf(" --ddr%dspd=0x%02x", if_num,
+                              dimm_config_table[didx].spd_addrs[0]);
+                       if (dimm_config_table[didx].spd_addrs[1] != 0)
+                               printf(",0x%02x",
+                                      dimm_config_table[didx].spd_addrs[1]);
+               }
+               printf("\n");
+       }
+
+       /*
+        * Walk the DRAM Socket Configuration Table to see what is installed.
+        */
+       for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+               /* Check for lower DIMM socket populated */
+               if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
+                       if (ddr_verbose(priv))
+                               report_dimm(&dimm_config_table[didx], 0,
+                                           dimm_count, if_num);
+                       ++dimm_count;
+               } else {
+                       break;
+               }               /* Finished when there is no lower DIMM */
+       }
+
+       initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
+                            ddr_ref_hertz, if_num, if_mask);
+
+       if (!odt_1rank_config)
+               odt_1rank_config = disable_odt_config;
+       if (!odt_2rank_config)
+               odt_2rank_config = disable_odt_config;
+       if (!odt_4rank_config)
+               odt_4rank_config = disable_odt_config;
+
+       s = env_get("ddr_safe");
+       if (s) {
+               safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
+               printf("Parameter found in environment. ddr_safe = %d\n",
+                      safe_ddr_flag);
+       }
+
+       if (dimm_count == 0) {
+               printf("ERROR: DIMM 0 not detected.\n");
+               return (-1);
+       }
+
+       if (c_cfg->mode32b)
+               if_64b = 0;
+
+       s = lookup_env(priv, "if_64b");
+       if (s)
+               if_64b = !!simple_strtoul(s, NULL, 0);
+
+       if (if_64b == 1) {
+               if (octeon_is_cpuid(OCTEON_CN70XX)) {
+                       printf("64-bit interface width is not supported for this Octeon model\n");
+                       ++fatal_error;
+               }
+       }
+
+       /* ddr_type only indicates DDR4 or DDR3 */
+       ddr_type = (read_spd(&dimm_config_table[0], 0,
+                            DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
+       debug("DRAM Device Type: DDR%d\n", ddr_type);
+
+       if (ddr_type == DDR4_DRAM) {
+               int spd_module_type;
+               int asymmetric;
+               const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
+
+               imp_val = &ddr4_impedence_val;
+
+               spd_addr =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+               spd_org =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR4_SPD_MODULE_ORGANIZATION);
+               spd_banks =
+                   0xFF & read_spd(&dimm_config_table[0], 0,
+                                   DDR4_SPD_DENSITY_BANKS);
+
+               bank_bits =
+                   (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
+               /* Controller can only address 4 bits. */
+               bank_bits = min((int)bank_bits, 4);
+
+               spd_package =
+                   0XFF & read_spd(&dimm_config_table[0], 0,
+                                   DDR4_SPD_PACKAGE_TYPE);
+               if (spd_package & 0x80) {       // non-monolithic device
+                       is_stacked_die = ((spd_package & 0x73) == 0x11);
+                       debug("DDR4: Package Type 0x%02x (%s), %d die\n",
+                             spd_package, signal_load[(spd_package & 3)],
+                             ((spd_package >> 4) & 7) + 1);
+                       is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
+                       if (is_3ds_dimm) {      // is it 3DS?
+                               lranks_per_prank = ((spd_package >> 4) & 7) + 1;
+                               // FIXME: should make sure it is only 2H or 4H
+                               // or 8H?
+                               lranks_bits = lranks_per_prank >> 1;
+                               if (lranks_bits == 4)
+                                       lranks_bits = 3;
+                       }
+               } else if (spd_package != 0) {
+                       // FIXME: print non-zero monolithic device definition
+                       debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
+                             ((spd_package >> 4) & 7) + 1, (spd_package & 3));
+               }
+
+               asymmetric = (spd_org >> 6) & 1;
+               if (asymmetric) {
+                       int spd_secondary_pkg =
+                           read_spd(&dimm_config_table[0], 0,
+                                    DDR4_SPD_SECONDARY_PACKAGE_TYPE);
+                       debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
+                             spd_secondary_pkg);
+               } else {
+                       u64 bus_width =
+                               8 << (0x07 &
+                               read_spd(&dimm_config_table[0], 0,
+                                        DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
+                       u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
+                       u64 module_cap;
+                       int shift = (spd_banks & 0x0F);
+
+                       die_capacity = (shift < 8) ? (256UL << shift) :
+                               ((12UL << (shift & 1)) << 10);
+                       debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
+                             (die_capacity > 512) ? (die_capacity >> 10) :
+                             die_capacity, (die_capacity > 512) ? 'G' : 'M');
+                       module_cap = ((u64)die_capacity << 20) / 8UL *
+                               bus_width / ddr_width *
+                               (1UL + ((spd_org >> 3) & 0x7));
+
+                       // is it 3DS?
+                       if (is_3ds_dimm) {
+                               module_cap *= (u64)(((spd_package >> 4) & 7) +
+                                                   1);
+                       }
+                       debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
+                             module_cap >> 30);
+               }
+
+               spd_rawcard =
+                   0xFF & read_spd(&dimm_config_table[0], 0,
+                                   DDR4_SPD_REFERENCE_RAW_CARD);
+               debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
+
+               spd_module_type =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR4_SPD_KEY_BYTE_MODULE_TYPE);
+               if (spd_module_type & 0x80) {   // HYBRID module
+                       debug("DDR4: HYBRID module, type %s\n",
+                             ((spd_module_type & 0x70) ==
+                              0x10) ? "NVDIMM" : "UNKNOWN");
+               }
+               spd_thermal_sensor =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR4_SPD_MODULE_THERMAL_SENSOR);
+               spd_dimm_type = spd_module_type & 0x0F;
+               spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
+                       (spd_dimm_type == 8);
+               if (spd_rdimm) {
+                       u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
+                       static const u16 manu_ids[4] = {
+                               0xb380, 0x3286, 0x9780, 0xb304
+                       };
+                       static const char *manu_names[4] = {
+                               "XXX", "XXXXXXX", "XX", "XXXXX"
+                       };
+                       int mc;
+
+                       spd_mfgr_id =
+                           (0xFFU &
+                            read_spd(&dimm_config_table[0], 0,
+                                     DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
+                           ((0xFFU &
+                             read_spd(&dimm_config_table[0], 0,
+                                      DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
+                            << 8);
+                       spd_register_rev =
+                           0xFFU & read_spd(&dimm_config_table[0], 0,
+                                            DDR4_SPD_REGISTER_REVISION_NUMBER);
+                       for (mc = 0; mc < 4; mc++)
+                               if (manu_ids[mc] == spd_mfgr_id)
+                                       break;
+
+                       debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
+                             (mc >= 4) ? "UNKNOWN" : manu_names[mc],
+                             spd_register_rev);
+
+                       // RAWCARD A or B must be bit 7=0 and bits 4-0
+                       // either 00000(A) or 00001(B)
+                       spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
+                       // RDIMM Module Attributes
+                       spd_mod_attr =
+                           0xFFU & read_spd(&dimm_config_table[0], 0,
+                                       DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
+                       spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
+                       debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
+                             spd_mod_attr, (spd_mod_attr >> 4) + 1,
+                             ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
+                             spd_rdimm_registers);
+               }
+               dimm_type_name = ddr4_dimm_types[spd_dimm_type];
+       } else {                /* if (ddr_type == DDR4_DRAM) */
+               const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
+
+               imp_val = &ddr3_impedence_val;
+
+               spd_addr =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+               spd_org =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_MODULE_ORGANIZATION);
+               spd_banks =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_DENSITY_BANKS) & 0xff;
+
+               bank_bits = 3 + ((spd_banks >> 4) & 0x7);
+               /* Controller can only address 3 bits. */
+               bank_bits = min((int)bank_bits, 3);
+               spd_dimm_type =
+                   0x0f & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_KEY_BYTE_MODULE_TYPE);
+               spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
+                       (spd_dimm_type == 9);
+
+               spd_package =
+                   0xFF & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_SDRAM_DEVICE_TYPE);
+               if (spd_package & 0x80) {       // non-standard device
+                       debug("DDR3: Device Type 0x%02x (%s), %d die\n",
+                             spd_package, signal_load[(spd_package & 3)],
+                             ((1 << ((spd_package >> 4) & 7)) >> 1));
+               } else if (spd_package != 0) {
+                       // FIXME: print non-zero monolithic device definition
+                       debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
+                             ((1 << (spd_package >> 4) & 7) >> 1),
+                             (spd_package & 3));
+               }
+
+               spd_rawcard =
+                   0xFF & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_REFERENCE_RAW_CARD);
+               debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
+               spd_thermal_sensor =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_MODULE_THERMAL_SENSOR);
+
+               if (spd_rdimm) {
+                       int spd_mfgr_id, spd_register_rev, spd_mod_attr;
+
+                       spd_mfgr_id =
+                           (0xFFU &
+                            read_spd(&dimm_config_table[0], 0,
+                                     DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
+                           ((0xFFU &
+                             read_spd(&dimm_config_table[0], 0,
+                                      DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
+                            << 8);
+                       spd_register_rev =
+                           0xFFU & read_spd(&dimm_config_table[0], 0,
+                                            DDR3_SPD_REGISTER_REVISION_NUMBER);
+                       debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
+                             spd_mfgr_id, spd_register_rev);
+                       // Module Attributes
+                       spd_mod_attr =
+                           0xFFU & read_spd(&dimm_config_table[0], 0,
+                                            DDR3_SPD_ADDRESS_MAPPING);
+                       spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
+                       debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
+                             spd_mod_attr,
+                             ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
+                             spd_rdimm_registers);
+               }
+               dimm_type_name = ddr3_dimm_types[spd_dimm_type];
+       }
+
+       if (spd_thermal_sensor & 0x80) {
+               debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
+                     (ddr_type == DDR4_DRAM) ? 4 : 3);
+       }
+
+       debug("spd_addr        : %#06x\n", spd_addr);
+       debug("spd_org         : %#06x\n", spd_org);
+       debug("spd_banks       : %#06x\n", spd_banks);
+
+       row_bits = 12 + ((spd_addr >> 3) & 0x7);
+       col_bits = 9 + ((spd_addr >> 0) & 0x7);
+
+       num_ranks = 1 + ((spd_org >> 3) & 0x7);
+       dram_width = 4 << ((spd_org >> 0) & 0x7);
+       num_banks = 1 << bank_bits;
+
+       s = lookup_env(priv, "ddr_num_ranks");
+       if (s)
+               num_ranks = simple_strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_enable_by_rank_init");
+       if (s)
+               enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
+
+       // FIXME: for now, we can only handle a DDR4 2rank-1slot config
+       // FIXME: also, by-rank init does not work correctly if 32-bit mode...
+       if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
+                                   dimm_count != 1 || if_64b != 1 ||
+                                   num_ranks != 2))
+               enable_by_rank_init = 0;
+
+       if (enable_by_rank_init) {
+               struct dimm_odt_config *odt_config;
+               union cvmx_lmcx_modereg_params1 mp1;
+               union cvmx_lmcx_modereg_params2 modereg_params2;
+               int by_rank_rodt, by_rank_wr, by_rank_park;
+
+               // Do ODT settings changes which work best for 2R-1S configs
+               debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
+
+               // setup for modifying config table values - 2 ranks and 1 DIMM
+               odt_config =
+                   (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
+
+               // original was 80, first try was 60
+               by_rank_rodt = ddr4_rodt_ctl_48_ohm;
+               s = lookup_env(priv, "ddr_by_rank_rodt");
+               if (s)
+                       by_rank_rodt = strtoul(s, NULL, 0);
+
+               odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
+
+               // this is for MODEREG_PARAMS1 fields
+               // fetch the original settings
+               mp1.u64 = odt_config->modereg_params1.u64;
+
+               by_rank_wr = ddr4_rttwr_80ohm;  // originals were 240
+               s = lookup_env(priv, "ddr_by_rank_wr");
+               if (s)
+                       by_rank_wr = simple_strtoul(s, NULL, 0);
+
+               // change specific settings here...
+               insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
+               insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
+
+               // save final settings
+               odt_config->modereg_params1.u64 = mp1.u64;
+
+               // this is for MODEREG_PARAMS2 fields
+               // fetch the original settings
+               modereg_params2.u64 = odt_config->modereg_params2.u64;
+
+               by_rank_park = ddr4_rttpark_none;       // originals were 120
+               s = lookup_env(priv, "ddr_by_rank_park");
+               if (s)
+                       by_rank_park = simple_strtoul(s, NULL, 0);
+
+               // change specific settings here...
+               modereg_params2.s.rtt_park_00 = by_rank_park;
+               modereg_params2.s.rtt_park_01 = by_rank_park;
+
+               // save final settings
+               odt_config->modereg_params2.u64 = modereg_params2.u64;
+       }
+
+       /*
+        * FIX
+        * Check that values are within some theoretical limits.
+        * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
+        *   14 - 3 - 4 = 7
+        * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
+        *   18 - 2 - 3 = 13
+        */
+       if (col_bits > 13 || col_bits < 7) {
+               printf("Unsupported number of Col Bits: %d\n", col_bits);
+               ++fatal_error;
+       }
+
+       /*
+        * FIX
+        * Check that values are within some theoretical limits.
+        * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
+        *   26 - 18 - 1 = 7
+        * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
+        *   33 - 14 - 1 = 18
+        */
+       if (row_bits > 18 || row_bits < 7) {
+               printf("Unsupported number of Row Bits: %d\n", row_bits);
+               ++fatal_error;
+       }
+
+       s = lookup_env(priv, "ddr_rdimm_ena");
+       if (s)
+               spd_rdimm = !!simple_strtoul(s, NULL, 0);
+
+       wl_loops = WLEVEL_LOOPS_DEFAULT;
+       // accept generic or interface-specific override
+       s = lookup_env(priv, "ddr_wlevel_loops");
+       if (!s)
+               s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
+
+       if (s)
+               wl_loops = strtoul(s, NULL, 0);
+
+       s = lookup_env(priv, "ddr_ranks");
+       if (s)
+               num_ranks = simple_strtoul(s, NULL, 0);
+
+       bunk_enable = (num_ranks > 1);
+
+       if (octeon_is_cpuid(OCTEON_CN7XXX))
+               column_bits_start = 3;
+       else
+               printf("ERROR: Unsupported Octeon model: 0x%x\n",
+                      read_c0_prid());
+
+       row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
+       debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
+             row_lsb);
+
+       pbank_lsb = row_lsb + row_bits + bunk_enable;
+       debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
+
+       if (lranks_per_prank > 1) {
+               pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
+               debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
+                     row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
+       }
+
+       mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
+       if (num_ranks == 4) {
+               /*
+                * Quad rank dimm capacity is equivalent to two dual-rank
+                * dimms.
+                */
+               mem_size_mbytes *= 2;
+       }
+
+       /*
+        * Mask with 1 bits set for for each active rank, allowing 2 bits
+        * per dimm. This makes later calculations simpler, as a variety
+        * of CSRs use this layout. This init needs to be updated for dual
+        * configs (ie non-identical DIMMs).
+        *
+        * Bit 0 = dimm0, rank 0
+        * Bit 1 = dimm0, rank 1
+        * Bit 2 = dimm1, rank 0
+        * Bit 3 = dimm1, rank 1
+        * ...
+        */
+       rank_mask = 0x1;
+       if (num_ranks > 1)
+               rank_mask = 0x3;
+       if (num_ranks > 2)
+               rank_mask = 0xf;
+
+       for (i = 1; i < dimm_count; i++)
+               rank_mask |= ((rank_mask & 0x3) << (2 * i));
+
+       /*
+        * If we are booting from RAM, the DRAM controller is
+        * already set up.  Just return the memory size
+        */
+       if (priv->flags & FLAG_RAM_RESIDENT) {
+               debug("Ram Boot: Skipping LMC config\n");
+               return mem_size_mbytes;
+       }
+
+       if (ddr_type == DDR4_DRAM) {
+               spd_ecc =
+                   !!(read_spd
+                      (&dimm_config_table[0], 0,
+                       DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
+       } else {
+               spd_ecc =
+                   !!(read_spd
+                      (&dimm_config_table[0], 0,
+                       DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
+       }
+
+       char rank_spec[8];
+
+       printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
+       debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
+             dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
+             rank_spec,
+             (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
+
+       if (ddr_type == DDR4_DRAM) {
+               spd_cas_latency =
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
+               spd_cas_latency |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
+               spd_cas_latency |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
+               spd_cas_latency |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
+       } else {
+               spd_cas_latency =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_CAS_LATENCIES_LSB);
+               spd_cas_latency |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
+       }
+       debug("spd_cas_latency : %#06x\n", spd_cas_latency);
+
+       if (ddr_type == DDR4_DRAM) {
+               /*
+                * No other values for DDR4 MTB and FTB are specified at the
+                * current time so don't bother reading them. Can't speculate
+                * how new values will be represented.
+                */
+               int spdmtb = 125;
+               int spdftb = 1;
+
+               taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                                          DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
+                        spdftb * (signed char)read_spd(&dimm_config_table[0],
+                        0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
+
+               ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+               ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
+
+               ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
+
+               ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
+
+               ddr4_trasmin = spdmtb *
+                       (((read_spd
+                          (&dimm_config_table[0], 0,
+                           DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
+                        (read_spd
+                         (&dimm_config_table[0], 0,
+                          DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
+
+               ddr4_trcmin = spdmtb *
+                       ((((read_spd
+                           (&dimm_config_table[0], 0,
+                            DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
+                         8) + (read_spd
+                               (&dimm_config_table[0], 0,
+                                DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
+                               0xff))
+                       + spdftb * (signed char)read_spd(&dimm_config_table[0],
+                                                        0,
+                       DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
+
+               ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
+                       8) + (read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
+
+               ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
+                       8) + (read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
+
+               ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
+                       8) + (read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
+
+               ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
+                       8) + (read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
+
+               ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
+
+               ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
+
+               ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
+                       spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
+
+               debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
+               debug("%-45s : %6d ps\n", "Fine Timebase   (FTB)", spdftb);
+
+               debug("%-45s : %6d ps (%ld MT/s)\n",
+                     "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
+                     pretty_psecs_to_mts(ddr4_tckavgmin));
+               debug("%-45s : %6d ps\n",
+                     "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
+               debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
+                     taamin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Active to Precharge Delay (tRASmin)",
+                     ddr4_trasmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Active to Active/Refr. Delay (tRCmin)",
+                     ddr4_trcmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Refresh Recovery Delay (tRFC1min)",
+                     ddr4_trfc1min);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Refresh Recovery Delay (tRFC2min)",
+                     ddr4_trfc2min);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Refresh Recovery Delay (tRFC4min)",
+                     ddr4_trfc4min);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Four Activate Window Time (tFAWmin)",
+                     ddr4_tfawmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
+               debug("%-45s : %6d ps\n",
+                     "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
+                     ddr4_tccd_lmin);
+
+#define DDR4_TWR 15000
+#define DDR4_TWTR_S 2500
+
+               tckmin = ddr4_tckavgmin;
+               twr = DDR4_TWR;
+               trcd = ddr4_trdcmin;
+               trrd = ddr4_trrd_smin;
+               trp = ddr4_trpmin;
+               tras = ddr4_trasmin;
+               trc = ddr4_trcmin;
+               trfc = ddr4_trfc1min;
+               twtr = DDR4_TWTR_S;
+               tfaw = ddr4_tfawmin;
+
+               if (spd_rdimm) {
+                       spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
+                       DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
+                       0x1;
+               } else {
+                       spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
+                               DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
+               }
+               debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+       } else {
+               spd_mtb_dividend =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+               spd_mtb_divisor =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+               spd_tck_min =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+               spd_taa_min =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
+
+               spd_twr =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
+               spd_trcd =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
+               spd_trrd =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
+               spd_trp =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
+               spd_tras =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
+               spd_tras |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
+               spd_trc =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
+               spd_trc |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
+               spd_trfc =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
+               spd_trfc |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
+                    8);
+               spd_twtr =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                               DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
+               spd_trtp =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                       DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
+               spd_tfaw =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
+               spd_tfaw |=
+                   ((0xff &
+                     read_spd(&dimm_config_table[0], 0,
+                              DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
+               spd_addr_mirror =
+                   0xff & read_spd(&dimm_config_table[0], 0,
+                                   DDR3_SPD_ADDRESS_MAPPING) & 0x1;
+               /* Only address mirror unbuffered dimms.  */
+               spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
+               ftb_dividend =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+               ftb_divisor =
+                   read_spd(&dimm_config_table[0], 0,
+                            DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+               /* Make sure that it is not 0 */
+               ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
+
+               debug("spd_twr         : %#06x\n", spd_twr);
+               debug("spd_trcd        : %#06x\n", spd_trcd);
+               debug("spd_trrd        : %#06x\n", spd_trrd);
+               debug("spd_trp         : %#06x\n", spd_trp);
+               debug("spd_tras        : %#06x\n", spd_tras);
+               debug("spd_trc         : %#06x\n", spd_trc);
+               debug("spd_trfc        : %#06x\n", spd_trfc);
+               debug("spd_twtr        : %#06x\n", spd_twtr);
+               debug("spd_trtp        : %#06x\n", spd_trtp);
+               debug("spd_tfaw        : %#06x\n", spd_tfaw);
+               debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+
+               mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+               taamin = mtb_psec * spd_taa_min;
+               taamin += ftb_dividend *
+                       (signed char)read_spd(&dimm_config_table[0],
+                               0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
+                       ftb_divisor;
+               tckmin = mtb_psec * spd_tck_min;
+               tckmin += ftb_dividend *
+                       (signed char)read_spd(&dimm_config_table[0],
+                               0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
+                       ftb_divisor;
+
+               twr = spd_twr * mtb_psec;
+               trcd = spd_trcd * mtb_psec;
+               trrd = spd_trrd * mtb_psec;
+               trp = spd_trp * mtb_psec;
+               tras = spd_tras * mtb_psec;
+               trc = spd_trc * mtb_psec;
+               trfc = spd_trfc * mtb_psec;
+               if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
+                       // default to this - because it works...
+                       int new_trfc = 260000;
+
+                       s = env_get("ddr_trfc");
+                       if (s) {
+                               new_trfc = simple_strtoul(s, NULL, 0);
+                               printf("Parameter found in environment. ddr_trfc = %d\n",
+                                      new_trfc);
+                               if (new_trfc < 160000 || new_trfc > 260000) {
+                                       // back to default if out of range
+                                       new_trfc = 260000;
+                               }
+                       }
+                       debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
+                             node, if_num, trfc, new_trfc);
+                       trfc = new_trfc;
+               }
+
+               twtr = spd_twtr * mtb_psec;
+               trtp = spd_trtp * mtb_psec;
+               tfaw = spd_tfaw * mtb_psec;
+
+               debug("Medium Timebase (MTB)                         : %6d ps\n",
+                     mtb_psec);
+               debug("Minimum Cycle Time (tckmin)                   : %6d ps (%ld MT/s)\n",
+                     tckmin, pretty_psecs_to_mts(tckmin));
+               debug("Minimum CAS Latency Time (taamin)             : %6d ps\n",
+                     taamin);
+               debug("Write Recovery Time (tWR)                     : %6d ps\n",
+                     twr);
+               debug("Minimum RAS to CAS delay (tRCD)               : %6d ps\n",
+                     trcd);
+               debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
+                     trrd);
+               debug("Minimum Row Precharge Delay (tRP)             : %6d ps\n",
+                     trp);
+               debug("Minimum Active to Precharge (tRAS)            : %6d ps\n",
+                     tras);
+               debug("Minimum Active to Active/Refresh Delay (tRC)  : %6d ps\n",
+                     trc);
+               debug("Minimum Refresh Recovery Delay (tRFC)         : %6d ps\n",
+                     trfc);
+               debug("Internal write to read command delay (tWTR)   : %6d ps\n",
+                     twtr);
+               debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
+                     trtp);
+               debug("Minimum Four Activate Window Delay (tFAW)     : %6d ps\n",
+                     tfaw);
+       }
+
+       /*
+        * When the cycle time is within 1 psec of the minimum accept it
+        * as a slight rounding error and adjust it to exactly the minimum
+        * cycle time. This avoids an unnecessary warning.
+        */
+       if (abs(tclk_psecs - tckmin) < 2)
+               tclk_psecs = tckmin;
+
+       if (tclk_psecs < (u64)tckmin) {
+               printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
+                      tclk_psecs, (ulong)tckmin);
+       }
+
+       debug("DDR Clock Rate (tCLK)                         : %6ld ps\n",
+             tclk_psecs);
+       debug("Core Clock Rate (eCLK)                        : %6ld ps\n",
+             eclk_psecs);
+
+       s = env_get("ddr_use_ecc");
+       if (s) {
+               use_ecc = !!simple_strtoul(s, NULL, 0);
+               printf("Parameter found in environment. ddr_use_ecc = %d\n",
+                      use_ecc);
+       }
+       use_ecc = use_ecc && spd_ecc;
+
+       if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
+           : (use_ecc ? 0x01f : 0x0f);
+
+       debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
+             if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
+
+       debug("\n------ Board Custom Configuration Settings ------\n");
+       debug("%-45s : %d\n", "MIN_RTT_NOM_IDX   ", c_cfg->min_rtt_nom_idx);
+       debug("%-45s : %d\n", "MAX_RTT_NOM_IDX   ", c_cfg->max_rtt_nom_idx);
+       debug("%-45s : %d\n", "MIN_RODT_CTL      ", c_cfg->min_rodt_ctl);
+       debug("%-45s : %d\n", "MAX_RODT_CTL      ", c_cfg->max_rodt_ctl);
+       debug("%-45s : %d\n", "MIN_CAS_LATENCY   ", c_cfg->min_cas_latency);
+       debug("%-45s : %d\n", "OFFSET_EN         ", c_cfg->offset_en);
+       debug("%-45s : %d\n", "OFFSET_UDIMM      ", c_cfg->offset_udimm);
+       debug("%-45s : %d\n", "OFFSET_RDIMM      ", c_cfg->offset_rdimm);
+       debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO  ", c_cfg->ddr_rtt_nom_auto);
+       debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
+       if (spd_rdimm)
+               debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
+                     c_cfg->rlevel_comp_offset_rdimm);
+       else
+               debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
+                     c_cfg->rlevel_comp_offset_udimm);
+       debug("%-45s : %d\n", "RLEVEL_COMPUTE    ", c_cfg->rlevel_compute);
+       debug("%-45s : %d\n", "DDR2T_UDIMM       ", c_cfg->ddr2t_udimm);
+       debug("%-45s : %d\n", "DDR2T_RDIMM       ", c_cfg->ddr2t_rdimm);
+       debug("%-45s : %d\n", "FPRCH2            ", c_cfg->fprch2);
+       debug("%-45s : %d\n", "PTUNE_OFFSET      ", c_cfg->ptune_offset);
+       debug("%-45s : %d\n", "NTUNE_OFFSET      ", c_cfg->ntune_offset);
+       debug("-------------------------------------------------\n");
+
+       cl = divide_roundup(taamin, tclk_psecs);
+
+       debug("Desired CAS Latency                           : %6d\n", cl);
+
+       min_cas_latency = c_cfg->min_cas_latency;
+
+       s = lookup_env(priv, "ddr_min_cas_latency");
+       if (s)
+               min_cas_latency = simple_strtoul(s, NULL, 0);
+
+       debug("CAS Latencies supported in DIMM               :");
+       base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
+       for (i = 0; i < 32; ++i) {
+               if ((spd_cas_latency >> i) & 1) {
+                       debug(" %d", i + base_cl);
+                       max_cas_latency = i + base_cl;
+                       if (min_cas_latency == 0)
+                               min_cas_latency = i + base_cl;
+               }
+       }
+       debug("\n");
+
+       /*
+        * Use relaxed timing when running slower than the minimum
+        * supported speed.  Adjust timing to match the smallest supported
+        * CAS Latency.
+        */
+       if (min_cas_latency > cl) {
+               ulong adjusted_tclk = taamin / min_cas_latency;
+
+               cl = min_cas_latency;
+               debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
+                     tclk_psecs, adjusted_tclk);
+               tclk_psecs = adjusted_tclk;
+       }
+
+       s = env_get("ddr_cas_latency");
+       if (s) {
+               override_cas_latency = simple_strtoul(s, NULL, 0);
+               printf("Parameter found in environment. ddr_cas_latency = %d\n",
+                      override_cas_latency);
+       }
+
+       /* Make sure that the selected cas latency is legal */
+       for (i = (cl - base_cl); i < 32; ++i) {
+               if ((spd_cas_latency >> i) & 1) {
+                       cl = i + base_cl;
+                       break;
+               }
+       }
+
+       if (max_cas_latency < cl)
+               cl = max_cas_latency;
+
+       if (override_cas_latency != 0)
+               cl = override_cas_latency;
+
+       debug("CAS Latency                                   : %6d\n", cl);
+
+       if ((cl * tckmin) > 20000) {
+               debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
+                     (cl * tckmin));
+       }
+
+       if (tclk_psecs < (ulong)tckmin) {
+               printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
+                      tclk_psecs, (ulong)tckmin);
+       }
+
+       if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
+               printf("Unsupported number of banks %d. Must be 4 or 8.\n",
+                      num_banks);
+               ++fatal_error;
+       }
+
+       if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
+               printf("Unsupported number of ranks: %d\n", num_ranks);
+               ++fatal_error;
+       }
+
+       if (octeon_is_cpuid(OCTEON_CN78XX) ||
+           octeon_is_cpuid(OCTEON_CN73XX) ||
+           octeon_is_cpuid(OCTEON_CNF75XX)) {
+               if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
+                       printf("Unsupported SDRAM Width, %d.  Must be 4, 8 or 16.\n",
+                              dram_width);
+                       ++fatal_error;
+               }
+       } else if (dram_width != 8 && dram_width != 16) {
+               printf("Unsupported SDRAM Width, %d.  Must be 8 or 16.\n",
+                      dram_width);
+               ++fatal_error;
+       }
+
+       /*
+        ** Bail out here if things are not copasetic.
+        */
+       if (fatal_error)
+               return (-1);
+
+       /*
+        * 4.8.4 LMC RESET Initialization
+        *
+        * The purpose of this step is to assert/deassert the RESET# pin at the
+        * DDR3/DDR4 parts.
+        *
+        * This LMC RESET step is done for all enabled LMCs.
+        */
+       perform_lmc_reset(priv, node, if_num);
+
+       // Make sure scrambling is disabled during init...
+       ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       ctrl.s.scramble_ena = 0;
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+
+       lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
+       lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
+       if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+               lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
+
+       odt_idx = min(dimm_count - 1, 3);
+
+       switch (num_ranks) {
+       case 1:
+               odt_config = odt_1rank_config;
+               break;
+       case 2:
+               odt_config = odt_2rank_config;
+               break;
+       case 4:
+               odt_config = odt_4rank_config;
+               break;
+       default:
+               odt_config = disable_odt_config;
+               printf("Unsupported number of ranks: %d\n", num_ranks);
+               ++fatal_error;
+       }
+
+       /*
+        * 4.8.5 Early LMC Initialization
+        *
+        * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
+        * completed prior to starting this LMC initialization sequence.
+        *
+        * Perform the following five substeps for early LMC initialization:
+        *
+        * 1. Software must ensure there are no pending DRAM transactions.
+        *
+        * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
+        *    LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
+        *    LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
+        *    LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
+        *    LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
+        *    appropriate values. All sections in this chapter can be used to
+        *    derive proper register settings.
+        */
+
+       /* LMC(0)_CONFIG */
+       lmc_config(priv);
+
+       /* LMC(0)_CONTROL */
+       lmc_control(priv);
+
+       /* LMC(0)_TIMING_PARAMS0 */
+       lmc_timing_params0(priv);
+
+       /* LMC(0)_TIMING_PARAMS1 */
+       lmc_timing_params1(priv);
+
+       /* LMC(0)_TIMING_PARAMS2 */
+       lmc_timing_params2(priv);
+
+       /* LMC(0)_MODEREG_PARAMS0 */
+       lmc_modereg_params0(priv);
+
+       /* LMC(0)_MODEREG_PARAMS1 */
+       lmc_modereg_params1(priv);
+
+       /* LMC(0)_MODEREG_PARAMS2 */
+       lmc_modereg_params2(priv);
+
+       /* LMC(0)_MODEREG_PARAMS3 */
+       lmc_modereg_params3(priv);
+
+       /* LMC(0)_NXM */
+       lmc_nxm(priv);
+
+       /* LMC(0)_WODT_MASK */
+       lmc_wodt_mask(priv);
+
+       /* LMC(0)_RODT_MASK */
+       lmc_rodt_mask(priv);
+
+       /* LMC(0)_COMP_CTL2 */
+       lmc_comp_ctl2(priv);
+
+       /* LMC(0)_PHY_CTL */
+       lmc_phy_ctl(priv);
+
+       /* LMC(0)_EXT_CONFIG */
+       lmc_ext_config(priv);
+
+       /* LMC(0)_EXT_CONFIG2 */
+       lmc_ext_config2(priv);
+
+       /* LMC(0)_DIMM0/1_PARAMS */
+       lmc_dimm01_params(priv);
+
+       ret = lmc_rank_init(priv);
+       if (ret < 0)
+               return 0;       /* 0 indicates problem */
+
+       lmc_config_2(priv);
+
+       lmc_write_leveling(priv);
+
+       lmc_read_leveling(priv);
+
+       lmc_workaround(priv);
+
+       ret = lmc_sw_write_leveling(priv);
+       if (ret < 0)
+               return 0;       /* 0 indicates problem */
+
+       // this sometimes causes stack overflow crashes..
+       // display only for DDR4 RDIMMs.
+       if (ddr_type == DDR4_DRAM && spd_rdimm) {
+               int i;
+
+               for (i = 0; i < 3; i += 2)      // just pages 0 and 2 for now..
+                       display_mpr_page(priv, rank_mask, if_num, i);
+       }
+
+       lmc_dll(priv);
+
+       lmc_workaround_2(priv);
+
+       lmc_final(priv);
+
+       lmc_scrambling(priv);
+
+       return mem_size_mbytes;
+}
+
+/////    HW-assist byte DLL offset tuning   //////
+
+static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
+{
+       union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
+
+       if (octeon_is_cpuid(OCTEON_CN70XX))
+               return 1;
+
+       if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
+               // sample LMC1
+               lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
+               if (lmcx_dll_ctl2.cn78xx.intf_en)
+                       return 2;
+               else
+                       return 1;
+       }
+
+       // for CN78XX, LMCs are always active in pairs, and always LMC0/1
+       // so, we sample LMC2 to see if 2 and 3 are active
+       lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
+       if (lmcx_dll_ctl2.cn78xx.intf_en)
+               return 4;
+       else
+               return 2;
+}
+
+// got to do these here, even though already defined in BDK
+
+// all DDR3, and DDR4 x16 today, use only 3 bank bits;
+// DDR4 x4 and x8 always have 4 bank bits
+// NOTE: this will change in the future, when DDR4 x16 devices can
+// come with 16 banks!! FIXME!!
+static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
+{
+       union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
+       union cvmx_lmcx_config lmcx_config;
+       union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
+       int bank_width;
+
+       // can always read this
+       lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
+
+       if (lmcx_dll_ctl2.cn78xx.dreset)        // check LMCn
+               return 0;
+
+       lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
+       lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
+
+       bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
+                     (lmcx_config.s.bg2_enable)) ? 4 : 3;
+
+       return bank_width;
+}
+
+#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
+#define ADDRESS_HOLE 0x10000000ULL
+
+static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
+                                          int *node, int *lmc, int *dimm,
+                                          int *prank, int *lrank, int *bank,
+                                          int *row, int *col)
+{
+       int bank_lsb, xbits;
+       union cvmx_l2c_ctl l2c_ctl;
+       union cvmx_lmcx_config lmcx_config;
+       union cvmx_lmcx_control lmcx_control;
+       union cvmx_lmcx_ext_config ext_config;
+       int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
+       int bank_width;
+       int dimm_lsb;
+       int dimm_width;
+       int prank_lsb, lrank_lsb;
+       int prank_width, lrank_width;
+       int row_lsb;
+       int row_width;
+       int col_hi_lsb;
+       int col_hi_width;
+       int col_hi;
+
+       if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
+               bitno = 18;
+
+       *node = EXTRACT(address, 40, 2);        /* Address bits [41:40] */
+
+       address &= (1ULL << 40) - 1;    // lop off any node bits or above
+       if (address >= ADDRESS_HOLE)    // adjust down if at HOLE or above
+               address -= ADDRESS_HOLE;
+
+       /* Determine the LMC controllers */
+       l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+
+       /* xbits depends on number of LMCs */
+       xbits = cvmx_dram_get_num_lmc(priv) >> 1;       // 4->2, 2->1, 1->0
+       bank_lsb = 7 + xbits;
+
+       /* LMC number is probably aliased */
+       if (l2c_ctl.s.disidxalias) {
+               *lmc = EXTRACT(address, 7, xbits);
+       }  else {
+               *lmc = EXTRACT(address, 7, xbits) ^
+                       EXTRACT(address, bitno, xbits) ^
+                       EXTRACT(address, 12, xbits);
+       }
+
+       /* Figure out the bank field width */
+       lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
+       ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
+       bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
+
+       /* Extract additional info from the LMC_CONFIG CSR */
+       dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
+       dimm_width = 40 - dimm_lsb;
+       prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
+       prank_width = dimm_lsb - prank_lsb;
+       lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
+       lrank_width = prank_lsb - lrank_lsb;
+       row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
+       row_width = lrank_lsb - row_lsb;
+       col_hi_lsb = bank_lsb + bank_width;
+       col_hi_width = row_lsb - col_hi_lsb;
+
+       /* Extract the parts of the address */
+       *dimm = EXTRACT(address, dimm_lsb, dimm_width);
+       *prank = EXTRACT(address, prank_lsb, prank_width);
+       *lrank = EXTRACT(address, lrank_lsb, lrank_width);
+       *row = EXTRACT(address, row_lsb, row_width);
+
+       /* bank calculation may be aliased... */
+       lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
+       if (lmcx_control.s.xor_bank) {
+               *bank = EXTRACT(address, bank_lsb, bank_width) ^
+                       EXTRACT(address, 12 + xbits, bank_width);
+       } else {
+               *bank = EXTRACT(address, bank_lsb, bank_width);
+       }
+
+       /* LMC number already extracted */
+       col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
+       *col = EXTRACT(address, 3, 4) | (col_hi << 4);
+       /* Bus byte is address bits [2:0]. Unused here */
+}
+
+// end of added workarounds
+
+// NOTE: "mode" argument:
+//         DBTRAIN_TEST: for testing using GP patterns, includes ECC
+//         DBTRAIN_DBI:  for DBI deskew training behavior (uses GP patterns)
+//         DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
+// NOTE: trust the caller to specify the correct/supported mode
+//
+static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
+                            int mode, u64 *xor_data)
+{
+       u64 p1;
+       u64 k;
+       int errors = 0;
+
+       u64 mpr_data0, mpr_data1;
+       u64 bad_bits[2] = { 0, 0 };
+
+       int node_address, lmc, dimm;
+       int prank, lrank;
+       int bank, row, col;
+       int save_or_dis;
+       int byte;
+       int ba_loop, ba_bits;
+
+       union cvmx_lmcx_rlevel_ctl rlevel_ctl;
+       union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
+       union cvmx_lmcx_phy_ctl phy_ctl;
+
+       int biter_errs;
+
+       // FIXME: K iterations set to 4 for now.
+       // FIXME: decrement to increase interations.
+       // FIXME: must be no less than 22 to stay above an LMC hash field.
+       int kshift = 27;
+
+       const char *s;
+       int node = 0;
+
+       // allow override default setting for kshift
+       s = env_get("ddr_tune_set_kshift");
+       if (s) {
+               int temp = simple_strtoul(s, NULL, 0);
+
+               if (temp < 22 || temp > 28) {
+                       debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
+                             node, if_num, temp, kshift);
+               } else {
+                       debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
+                             node, if_num, kshift, temp);
+                       kshift = temp;
+               }
+       }
+
+       /*
+        * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
+        */
+       rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+       save_or_dis = rlevel_ctl.s.or_dis;
+       /* or_dis must be disabled for this sequence */
+       rlevel_ctl.s.or_dis = 0;
+       lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+
+       /*
+        * NOTE: this step done in the calling routine(s)...
+        * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
+        * of choice.
+        * a. GENERAL_PURPOSE0[DATA<63:0>] â€“ sets the initial lower
+        * (rising edge) 64 bits of data.
+        * b. GENERAL_PURPOSE1[DATA<63:0>] â€“ sets the initial upper
+        * (falling edge) 64 bits of data.
+        * c. GENERAL_PURPOSE2[DATA<15:0>] â€“ sets the initial lower
+        * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
+        */
+
+       // final address must include LMC and node
+       p |= (if_num << 7);     /* Map address into proper interface */
+       p |= (u64)node << CVMX_NODE_MEM_SHIFT;  // map to node
+
+       /*
+        * Add base offset to both test regions to not clobber u-boot stuff
+        * when running from L2 for NAND boot.
+        */
+       p += 0x20000000;        // offset to 512MB, ie above THE HOLE!!!
+       p |= 1ull << 63;        // needed for OCTEON
+
+       errors = 0;
+
+       cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
+                                      &prank, &lrank, &bank, &row, &col);
+       debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
+             __func__, p, node_address, lmc, dimm, prank, lrank, bank,
+             row, col);
+
+       // only check once per call, and ignore if no match...
+       if ((int)node != node_address) {
+               printf("ERROR: Node address mismatch\n");
+               return 0;
+       }
+       if (lmc != if_num) {
+               printf("ERROR: LMC address mismatch\n");
+               return 0;
+       }
+
+       /*
+        * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
+        * it’s a one-shot operation). This is to get into the habit of
+        * resetting PHY’s SILO to the original 0 location.
+        */
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+       phy_ctl.s.phy_reset = 1;
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+       /*
+        * Walk through a range of addresses avoiding bits that alias
+        * interfaces on the CN88XX.
+        */
+
+       // FIXME: want to try to keep the K increment from affecting the
+       // LMC via hash, so keep it above bit 21 we also want to keep k
+       // less than the base offset of bit 29 (512MB)
+
+       for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
+               // FIXME: the sequence will interate over 1/2 cacheline
+               // FIXME: for each unit specified in "read_cmd_count",
+               // FIXME: so, we setup each sequence to do the max cachelines
+               // it can
+
+               p1 = p + k;
+
+               cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
+                                              &dimm, &prank, &lrank, &bank,
+                                              &row, &col);
+
+               /*
+                * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
+                * a. COL, ROW, BA, BG, PRANK points to the starting point
+                * of the address.
+                * You can just set them to all 0.
+                * b. RW_TRAIN â€“ set this to 1.
+                * c. TCCD_L â€“ set this to 0.
+                * d. READ_CMD_COUNT â€“ instruct the sequence to the how many
+                * writes/reads.
+                * It is 5 bits field, so set to 31 of maximum # of r/w.
+                */
+               dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
+               dbtrain_ctl.s.column_a = col;
+               dbtrain_ctl.s.row_a = row;
+               dbtrain_ctl.s.bg = (bank >> 2) & 3;
+               dbtrain_ctl.s.prank = (dimm * 2) + prank;       // FIXME?
+               dbtrain_ctl.s.lrank = lrank;    // FIXME?
+               dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
+               dbtrain_ctl.s.write_ena = 1;
+               dbtrain_ctl.s.read_cmd_count = 31;      // max count pass 1.x
+               if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+                   octeon_is_cpuid(OCTEON_CNF75XX)) {
+                       // max count on chips that support it
+                       dbtrain_ctl.s.cmd_count_ext = 3;
+               } else {
+                       // max count pass 1.x
+                       dbtrain_ctl.s.cmd_count_ext = 0;
+               }
+
+               dbtrain_ctl.s.rw_train = 1;
+               dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
+               // LFSR should only be on when chip supports it...
+               dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
+
+               biter_errs = 0;
+
+               // for each address, iterate over the 4 "banks" in the BA
+               for (ba_loop = 0, ba_bits = bank & 3;
+                    ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
+                       dbtrain_ctl.s.ba = ba_bits;
+                       lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
+                              dbtrain_ctl.u64);
+
+                       /*
+                        * We will use the RW_TRAINING sequence (14) for
+                        * this task.
+                        *
+                        * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
+                        *    SEQ_CTL[INIT_START] = 1).
+                        * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
+                        */
+                       oct3_ddr3_seq(priv, prank, if_num, 14);
+
+                       /*
+                        * 6) Read MPR_DATA0 and MPR_DATA1 for results.
+                        * a. MPR_DATA0[MPR_DATA<63:0>] â€“ comparison results
+                        *    for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
+                        * b. MPR_DATA1[MPR_DATA<7:0>] â€“ comparison results
+                        *    for ECC bit7:0.
+                        */
+                       mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
+                       mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
+
+                       /*
+                        * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
+                        * clears this as it’s a one-shot operation).
+                        * This is to get into the habit of resetting PHY’s
+                        * SILO to the original 0 location.
+                        */
+                       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+                       phy_ctl.s.phy_reset = 1;
+                       lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+                       // bypass any error checking or updating when DBI mode
+                       if (mode == DBTRAIN_DBI)
+                               continue;
+
+                       // data bytes
+                       if (~mpr_data0) {
+                               for (byte = 0; byte < 8; byte++) {
+                                       if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
+                                               biter_errs |= (1 << byte);
+                               }
+                               // accumulate bad bits
+                               bad_bits[0] |= ~mpr_data0;
+                       }
+
+                       // include ECC byte errors
+                       if (~mpr_data1 & 0xffUL) {
+                               biter_errs |= (1 << 8);
+                               bad_bits[1] |= ~mpr_data1 & 0xffUL;
+                       }
+               }
+
+               errors |= biter_errs;
+       }                       /* end for (k=...) */
+
+       rlevel_ctl.s.or_dis = save_or_dis;
+       lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+
+       // send the bad bits back...
+       if (mode != DBTRAIN_DBI && xor_data) {
+               xor_data[0] = bad_bits[0];
+               xor_data[1] = bad_bits[1];
+       }
+
+       return errors;
+}
+
+// setup default for byte test pattern array
+// take these from the HRM section 6.9.13
+static const u64 byte_pattern_0[] = {
+       0xFFAAFFFFFF55FFFFULL,  // GP0
+       0x55555555AAAAAAAAULL,  // GP1
+       0xAA55AAAAULL,          // GP2
+};
+
+static const u64 byte_pattern_1[] = {
+       0xFBF7EFDFBF7FFEFDULL,  // GP0
+       0x0F1E3C78F0E1C387ULL,  // GP1
+       0xF0E1BF7FULL,          // GP2
+};
+
+// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
+static const u64 byte_pattern_2[] = {
+       0xEE55AADDEE55AADDULL,  // GP0
+       0x55AADDEE55AADDEEULL,  // GP1
+       0x55EEULL,              // GP2
+};
+
+// this is from Mike via LFSR with PRBS=0x4A519909
+static const u64 byte_pattern_3[] = {
+       0x0088CCEE0088CCEEULL,  // GP0
+       0xBB552211BB552211ULL,  // GP1
+       0xBB00ULL,              // GP2
+};
+
+static const u64 *byte_patterns[4] = {
+       byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
+};
+
+static const u32 lfsr_patterns[4] = {
+       0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
+};
+
+#define NUM_BYTE_PATTERNS 4
+
+#define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor
+
+static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
+                            const u64 *pattern_p)
+{
+       /*
+        * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
+        * of choice.
+        * a. GENERAL_PURPOSE0[DATA<63:0>] Ã¢\80\93 sets the initial lower
+        *    (rising edge) 64 bits of data.
+        * b. GENERAL_PURPOSE1[DATA<63:0>] Ã¢\80\93 sets the initial upper
+        *    (falling edge) 64 bits of data.
+        * c. GENERAL_PURPOSE2[DATA<15:0>] Ã¢\80\93 sets the initial lower
+        *    (rising edge <7:0>) and upper
+        * (falling edge <15:8>) ECC data.
+        */
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
+}
+
+static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
+{
+       union cvmx_lmcx_char_ctl char_ctl;
+       u32 prbs;
+       const char *s;
+
+       s = env_get("ddr_lfsr_prbs");
+       if (s)
+               prbs = simple_strtoul(s, NULL, 0);
+       else
+               prbs = data;
+
+       /*
+        * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+        * here data comes from the LFSR generating a PRBS pattern
+        * CHAR_CTL.EN = 0
+        * CHAR_CTL.SEL = 0; // for PRBS
+        * CHAR_CTL.DR = 1;
+        * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+        * CHAR_CTL.SKEW_ON = 1;
+        */
+       char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
+       char_ctl.s.en = 0;
+       char_ctl.s.sel = 0;
+       char_ctl.s.dr = 1;
+       char_ctl.s.prbs = prbs;
+       char_ctl.s.skew_on = 1;
+       lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
+}
+
+static int choose_best_hw_patterns(int lmc, int mode)
+{
+       int new_mode = mode;
+       const char *s;
+
+       switch (mode) {
+       case DBTRAIN_TEST:      // always choose LFSR if chip supports it
+               if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
+                       int lfsr_enable = 1;
+
+                       s = env_get("ddr_allow_lfsr");
+                       if (s) {
+                               // override?
+                               lfsr_enable = !!strtoul(s, NULL, 0);
+                       }
+
+                       if (lfsr_enable)
+                               new_mode = DBTRAIN_LFSR;
+               }
+               break;
+
+       case DBTRAIN_DBI:       // possibly can allow LFSR use?
+               break;
+
+       case DBTRAIN_LFSR:      // forced already
+               if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
+                       debug("ERROR: illegal HW assist mode %d\n", mode);
+                       new_mode = DBTRAIN_TEST;
+               }
+               break;
+
+       default:
+               debug("ERROR: unknown HW assist mode %d\n", mode);
+       }
+
+       if (new_mode != mode)
+               debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
+
+       return new_mode;
+}
+
+int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
+                        int mode, u64 *xor_data)
+{
+       int pattern;
+       const u64 *pattern_p;
+       int errs, errors = 0;
+
+       // FIXME? always choose LFSR if chip supports it???
+       mode = choose_best_hw_patterns(lmc, mode);
+
+       for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+               if (mode == DBTRAIN_LFSR) {
+                       setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
+               } else {
+                       pattern_p = byte_patterns[pattern];
+                       setup_hw_pattern(priv, lmc, pattern_p);
+               }
+               errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
+
+               debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
+                     __func__, pattern, phys_addr, errs);
+
+               errors |= errs;
+       }
+
+       return errors;
+}
+
+static void hw_assist_test_dll_offset(struct ddr_priv *priv,
+                                     int dll_offset_mode, int lmc,
+                                     int bytelane,
+                                     int if_64b,
+                                     u64 dram_tune_rank_offset,
+                                     int dram_tune_byte_bursts)
+{
+       int byte_offset, new_best_offset[9];
+       int rank_delay_start[4][9];
+       int rank_delay_count[4][9];
+       int rank_delay_best_start[4][9];
+       int rank_delay_best_count[4][9];
+       int errors[4], off_errors, tot_errors;
+       int rank_mask, rankx, active_ranks;
+       int pattern;
+       const u64 *pattern_p;
+       int byte;
+       char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+       int pat_best_offset[9];
+       u64 phys_addr;
+       int pat_beg, pat_end;
+       int rank_beg, rank_end;
+       int byte_lo, byte_hi;
+       union cvmx_lmcx_config lmcx_config;
+       u64 hw_rank_offset;
+       int num_lmcs = cvmx_dram_get_num_lmc(priv);
+       // FIXME? always choose LFSR if chip supports it???
+       int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
+       int node = 0;
+
+       if (bytelane == 0x0A) { // all bytelanes
+               byte_lo = 0;
+               byte_hi = 8;
+       } else {                // just 1
+               byte_lo = bytelane;
+               byte_hi = bytelane;
+       }
+
+       lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+       rank_mask = lmcx_config.s.init_status;
+
+       // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+       hw_rank_offset =
+           1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
+                    (num_lmcs / 2));
+
+       debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
+             node, __func__, lmc, (unsigned long long)hw_rank_offset);
+
+       // start of pattern loop
+       // we do the set of tests for each pattern supplied...
+
+       memset(new_best_offset, 0, sizeof(new_best_offset));
+       for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+               memset(pat_best_offset, 0, sizeof(pat_best_offset));
+
+               if (mode == DBTRAIN_TEST) {
+                       pattern_p = byte_patterns[pattern];
+                       setup_hw_pattern(priv, lmc, pattern_p);
+               } else {
+                       setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
+               }
+
+               // now loop through all legal values for the DLL byte offset...
+
+#define BYTE_OFFSET_INCR 3     // FIXME: make this tunable?
+
+               tot_errors = 0;
+
+               memset(rank_delay_count, 0, sizeof(rank_delay_count));
+               memset(rank_delay_start, 0, sizeof(rank_delay_start));
+               memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
+               memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
+
+               for (byte_offset = -63; byte_offset < 64;
+                    byte_offset += BYTE_OFFSET_INCR) {
+                       // do the setup on the active LMC
+                       // set the bytelanes DLL offsets
+                       change_dll_offset_enable(priv, lmc, 0);
+                       // FIXME? bytelane?
+                       load_dll_offset(priv, lmc, dll_offset_mode,
+                                       byte_offset, bytelane);
+                       change_dll_offset_enable(priv, lmc, 1);
+
+                       //bdk_watchdog_poke();
+
+                       // run the test on each rank
+                       // only 1 call per rank should be enough, let the
+                       // bursts, loops, etc, control the load...
+
+                       // errors for this byte_offset, all ranks
+                       off_errors = 0;
+
+                       active_ranks = 0;
+
+                       for (rankx = 0; rankx < 4; rankx++) {
+                               if (!(rank_mask & (1 << rankx)))
+                                       continue;
+
+                               phys_addr = hw_rank_offset * active_ranks;
+                               // FIXME: now done by test_dram_byte_hw()
+                               //phys_addr |= (lmc << 7);
+                               //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
+
+                               active_ranks++;
+
+                               // NOTE: return is a now a bitmask of the
+                               // erroring bytelanes.
+                               errors[rankx] =
+                                   test_dram_byte_hw(priv, lmc, phys_addr,
+                                                     mode, NULL);
+
+                               // process any errors in the bytelane(s) that
+                               // are being tested
+                               for (byte = byte_lo; byte <= byte_hi; byte++) {
+                                       // check errors
+                                       // yes, an error in the byte lane in
+                                       // this rank
+                                       if (errors[rankx] & (1 << byte)) {
+                                               off_errors |= (1 << byte);
+
+                                               debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
+                                                     node, lmc, rankx, byte,
+                                                     mode_str, byte_offset,
+                                                     phys_addr);
+
+                                               // had started run
+                                               if (rank_delay_count
+                                                   [rankx][byte] > 0) {
+                                                       debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
+                                                             node, lmc, rankx,
+                                                             byte, mode_str,
+                                                             byte_offset);
+                                                       // stop now
+                                                       rank_delay_count
+                                                               [rankx][byte] =
+                                                               0;
+                                               }
+                                               // FIXME: else had not started
+                                               // run - nothing else to do?
+                                       } else {
+                                               // no error in the byte lane
+                                               // first success, set run start
+                                               if (rank_delay_count[rankx]
+                                                   [byte] == 0) {
+                                                       debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
+                                                             node, lmc, rankx,
+                                                             byte, mode_str,
+                                                             byte_offset);
+                                                       rank_delay_start[rankx]
+                                                               [byte] =
+                                                               byte_offset;
+                                               }
+                                               // bump run length
+                                               rank_delay_count[rankx][byte]
+                                                       += BYTE_OFFSET_INCR;
+
+                                               // is this now the biggest
+                                               // window?
+                                               if (rank_delay_count[rankx]
+                                                   [byte] >
+                                                   rank_delay_best_count[rankx]
+                                                   [byte]) {
+                                                       rank_delay_best_count
+                                                           [rankx][byte] =
+                                                           rank_delay_count
+                                                           [rankx][byte];
+                                                       rank_delay_best_start
+                                                           [rankx][byte] =
+                                                           rank_delay_start
+                                                           [rankx][byte];
+                                                       debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
+                                                             node, lmc, rankx,
+                                                             byte, mode_str,
+                                                             byte_offset,
+                                                             rank_delay_best_start
+                                                             [rankx][byte],
+                                                             rank_delay_best_count
+                                                             [rankx][byte]);
+                                               }
+                                       }
+                               }
+                       } /* for (rankx = 0; rankx < 4; rankx++) */
+
+                       tot_errors |= off_errors;
+               }
+
+               // set the bytelanes DLL offsets all back to 0
+               change_dll_offset_enable(priv, lmc, 0);
+               load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
+               change_dll_offset_enable(priv, lmc, 1);
+
+               // now choose the best byte_offsets for this pattern
+               // according to the best windows of the tested ranks
+               // calculate offset by constructing an average window
+               // from the rank windows
+               for (byte = byte_lo; byte <= byte_hi; byte++) {
+                       pat_beg = -999;
+                       pat_end = 999;
+
+                       for (rankx = 0; rankx < 4; rankx++) {
+                               if (!(rank_mask & (1 << rankx)))
+                                       continue;
+
+                               rank_beg = rank_delay_best_start[rankx][byte];
+                               pat_beg = max(pat_beg, rank_beg);
+                               rank_end = rank_beg +
+                                       rank_delay_best_count[rankx][byte] -
+                                       BYTE_OFFSET_INCR;
+                               pat_end = min(pat_end, rank_end);
+
+                               debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test:  Rank Window %3d:%3d\n",
+                                     node, lmc, rankx, byte, mode_str,
+                                     rank_beg, rank_end);
+
+                       }       /* for (rankx = 0; rankx < 4; rankx++) */
+
+                       pat_best_offset[byte] = (pat_end + pat_beg) / 2;
+
+                       // sum the pattern averages
+                       new_best_offset[byte] += pat_best_offset[byte];
+               }
+
+               // now print them on 1 line, descending order...
+               debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
+                     node, lmc, mode_str, pattern);
+               for (byte = byte_hi; byte >= byte_lo; --byte)
+                       debug(" %4d", pat_best_offset[byte]);
+               debug("\n");
+       }
+       // end of pattern loop
+
+       debug("N%d.LMC%d: HW DLL %s Offset Average  : ", node, lmc, mode_str);
+
+       // print in decending byte index order
+       for (byte = byte_hi; byte >= byte_lo; --byte) {
+               // create the new average NINT
+               new_best_offset[byte] = divide_nint(new_best_offset[byte],
+                                                   NUM_BYTE_PATTERNS);
+
+               // print the best offsets from all patterns
+
+               // print just the offset of all the bytes
+               if (bytelane == 0x0A)
+                       debug("%4d ", new_best_offset[byte]);
+               else            // print the bytelanes also
+                       debug("(byte %d) %4d ", byte, new_best_offset[byte]);
+
+               // done with testing, load up the best offsets we found...
+               // disable offsets while we load...
+               change_dll_offset_enable(priv, lmc, 0);
+               load_dll_offset(priv, lmc, dll_offset_mode,
+                               new_best_offset[byte], byte);
+               // re-enable the offsets now that we are done loading
+               change_dll_offset_enable(priv, lmc, 1);
+       }
+
+       debug("\n");
+}
+
+/*
+ * Automatically adjust the DLL offset for the selected bytelane using
+ * hardware-assist
+ */
+static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
+                                       int dll_offset_mode, int bytelane)
+{
+       int if_64b;
+       int save_ecc_ena[4];
+       union cvmx_lmcx_config lmc_config;
+       int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
+       const char *s;
+       int loops = 1, loop;
+       int by;
+       u64 dram_tune_rank_offset;
+       int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
+       int node = 0;
+
+       // see if we want to do the tuning more than once per LMC...
+       s = env_get("ddr_tune_ecc_loops");
+       if (s)
+               loops = strtoul(s, NULL, 0);
+
+       // allow override of the test repeats (bursts)
+       s = env_get("ddr_tune_byte_bursts");
+       if (s)
+               dram_tune_byte_bursts = strtoul(s, NULL, 10);
+
+       // print current working values
+       debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
+             node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
+
+       // FIXME? get flag from LMC0 only
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
+       if_64b = !lmc_config.s.mode32b;
+
+       // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+       dram_tune_rank_offset =
+           1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
+                    (num_lmcs / 2));
+
+       // do once for each active LMC
+
+       for (lmc = 0; lmc < num_lmcs; lmc++) {
+               debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
+                     node, lmc, bytelane);
+
+               /* Enable ECC for the HW tests */
+               // NOTE: we do enable ECC, but the HW tests used will not
+               // generate "visible" errors
+               lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+               save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+               lmc_config.s.ecc_ena = 1;
+               lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
+               lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+
+               // testing is done on a single LMC at a time
+               // FIXME: for now, loop here to show what happens multiple times
+               for (loop = 0; loop < loops; loop++) {
+                       /* Perform DLL offset tuning */
+                       hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
+                                                 bytelane,
+                                                 if_64b, dram_tune_rank_offset,
+                                                 dram_tune_byte_bursts);
+               }
+
+               // perform cleanup on active LMC
+               debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
+                     node, lmc, bytelane);
+
+               /* Restore ECC for DRAM tests */
+               lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+               lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+               lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
+               lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+
+               // finally, see if there are any read offset overrides
+               // after tuning
+               for (by = 0; by < 9; by++) {
+                       s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
+                       if (s) {
+                               int dllro = strtoul(s, NULL, 10);
+
+                               change_dll_offset_enable(priv, lmc, 0);
+                               load_dll_offset(priv, lmc, 2, dllro, by);
+                               change_dll_offset_enable(priv, lmc, 1);
+                       }
+               }
+
+       }                       /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+       // finish up...
+
+       return 0;
+
+}                              /* perform_HW_dll_offset_tuning */
+
+// this routine simply makes the calls to the tuning routine and returns
+// any errors
+static int cvmx_tune_node(struct ddr_priv *priv)
+{
+       int errs, tot_errs;
+       int do_dllwo = 0;       // default to NO
+       const char *str;
+       int node = 0;
+
+       // Automatically tune the data and ECC byte DLL read offsets
+       debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
+       errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
+       debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
+             node, errs);
+       tot_errs = errs;
+
+       // disabled by default for now, does not seem to be needed?
+       // Automatically tune the data and ECC byte DLL write offsets
+       // allow override of default setting
+       str = env_get("ddr_tune_write_offsets");
+       if (str)
+               do_dllwo = !!strtoul(str, NULL, 0);
+       if (do_dllwo) {
+               debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
+               errs =
+                   perform_HW_dll_offset_tuning(priv, 1,
+                                                0x0A /* all bytelanes */);
+               debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
+                     node, errs);
+               tot_errs += errs;
+       }
+
+       return tot_errs;
+}
+
+// this routine makes the calls to the tuning routines when criteria are met
+// intended to be called for automated tuning, to apply filtering...
+
+#define IS_DDR4  1
+#define IS_DDR3  0
+#define IS_RDIMM 1
+#define IS_UDIMM 0
+#define IS_1SLOT 1
+#define IS_2SLOT 0
+
+// FIXME: DDR3 is not tuned
+static const u32 ddr_speed_filter[2][2][2] = {
+       [IS_DDR4] = {
+                    [IS_RDIMM] = {
+                                  [IS_1SLOT] = 940,
+                                  [IS_2SLOT] = 800},
+                    [IS_UDIMM] = {
+                                  [IS_1SLOT] = 1050,
+                                  [IS_2SLOT] = 940},
+                     },
+       [IS_DDR3] = {
+                    [IS_RDIMM] = {
+                                  [IS_1SLOT] = 0,      // disabled
+                                  [IS_2SLOT] = 0       // disabled
+                                  },
+                    [IS_UDIMM] = {
+                                  [IS_1SLOT] = 0,      // disabled
+                                  [IS_2SLOT] = 0       // disabled
+                               }
+               }
+};
+
+void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
+{
+       const char *s;
+       union cvmx_lmcx_config lmc_config;
+       union cvmx_lmcx_control lmc_control;
+       union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
+       int is_ddr4;
+       int is_rdimm;
+       int is_1slot;
+       int do_tune = 0;
+       u32 ddr_min_speed;
+       int node = 0;
+
+       // scale it down from Hz to MHz
+       ddr_speed = divide_nint(ddr_speed, 1000000);
+
+       // FIXME: allow an override here so that all configs can be tuned
+       // or none
+       // If the envvar is defined, always either force it or avoid it
+       // accordingly
+       s = env_get("ddr_tune_all_configs");
+       if (s) {
+               do_tune = !!strtoul(s, NULL, 0);
+               printf("N%d: DRAM auto-tuning %s.\n", node,
+                      (do_tune) ? "forced" : "disabled");
+               if (do_tune)
+                       cvmx_tune_node(priv);
+
+               return;
+       }
+
+       // filter the tuning calls here...
+       // determine if we should/can run automatically for this configuration
+       //
+       // FIXME: tune only when the configuration indicates it will help:
+       //    DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
+       //
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));     // sample LMC0
+       lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0));   // sample LMC0
+       // sample LMC0
+       lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+       is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
+       is_rdimm = (lmc_control.s.rdimm_ena != 0);
+       // HACK, should do better
+       is_1slot = (lmc_config.s.init_status < 4);
+
+       ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
+       do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
+
+       debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
+             node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
+             (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
+
+       // call the tuning routine, filtering is done...
+       if (do_tune)
+               cvmx_tune_node(priv);
+}
+
+/*
+ * first pattern example:
+ * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ * GENERAL_PURPOSE0.DATA == 16'h0000;
+ */
+
+static const u64 dbi_pattern[3] = {
+       0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
+
+// Perform switchover to DBI
+static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
+{
+       union cvmx_lmcx_modereg_params0 modereg_params0;
+       union cvmx_lmcx_modereg_params3 modereg_params3;
+       union cvmx_lmcx_phy_ctl phy_ctl;
+       union cvmx_lmcx_config lmcx_config;
+       union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+       int rank_mask, rankx, active_ranks;
+       u64 phys_addr, rank_offset;
+       int num_lmcs, errors;
+       int dbi_settings[9], byte, unlocked, retries;
+       int ecc_ena;
+       int rank_max = 1;       // FIXME: make this 4 to try all the ranks
+       int node = 0;
+
+       ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+       lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+       rank_mask = lmcx_config.s.init_status;
+       ecc_ena = lmcx_config.s.ecc_ena;
+
+       // FIXME: must filter out any non-supported configs
+       //        ie, no DDR3, no x4 devices
+       if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
+               debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
+                     node, lmc);
+               return;
+       }
+
+       // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+       num_lmcs = cvmx_dram_get_num_lmc(priv);
+       rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
+                              lmcx_config.s.rank_ena + (num_lmcs / 2));
+
+       debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
+             node, lmc, rank_mask, (unsigned long long)rank_offset);
+
+       /*
+        * 1. conduct the current init sequence as usual all the way
+        * after software write leveling.
+        */
+
+       read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
+
+       display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
+                                " INIT");
+
+       /*
+        * 2. set DBI related CSRs as below and issue MR write.
+        * MODEREG_PARAMS3.WR_DBI=1
+        * MODEREG_PARAMS3.RD_DBI=1
+        * PHY_CTL.DBI_MODE_ENA=1
+        */
+       modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
+
+       modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
+       modereg_params3.s.wr_dbi = 1;
+       modereg_params3.s.rd_dbi = 1;
+       lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
+
+       phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
+       phy_ctl.s.dbi_mode_ena = 1;
+       lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
+
+       /*
+        * there are two options for data to send.  Lets start with (1)
+        * and could move to (2) in the future:
+        *
+        * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
+        * this does not exist) set data directly in these reigsters.
+        * this will yield a clk/2 pattern:
+        * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+        * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+        * GENERAL_PURPOSE0.DATA == 16'h0000;
+        * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+        * here data comes from the LFSR generating a PRBS pattern
+        * CHAR_CTL.EN = 0
+        * CHAR_CTL.SEL = 0; // for PRBS
+        * CHAR_CTL.DR = 1;
+        * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+        * CHAR_CTL.SKEW_ON = 1;
+        */
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
+       lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
+
+       /*
+        * 3. adjust cas_latency (only necessary if RD_DBI is set).
+        * here is my code for doing this:
+        *
+        * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
+        * case (csr_model.MODEREG_PARAMS0.CL.value)
+        * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
+        * // CL 9-13 -> 11-15
+        * 5: begin
+        * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+        * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
+        * csr_model.MODEREG_PARAMS0.CWL.value==3))
+        * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
+        * else
+        * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
+        * end
+        * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
+        * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
+        * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
+        * default:
+        * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
+        * I am not sure what to do.",
+        * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
+        * endcase
+        * end
+        */
+
+       if (modereg_params3.s.rd_dbi == 1) {
+               int old_cl, new_cl, old_cwl;
+
+               old_cl = modereg_params0.s.cl;
+               old_cwl = modereg_params0.s.cwl;
+
+               switch (old_cl) {
+               case 0:
+               case 1:
+               case 2:
+               case 3:
+               case 4:
+                       new_cl = old_cl + 2;
+                       break;  // 9-13->11-15
+                       // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+               case 5:
+                       new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
+                       break;
+               case 6:
+                       new_cl = 8;
+                       break;  // 15->18
+               case 7:
+                       new_cl = 14;
+                       break;  // 16->19
+               case 8:
+                       new_cl = 15;
+                       break;  // 18->21
+               default:
+                       printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
+                              old_cl);
+                       // FIXME: need to error exit here...
+                       old_cl = -1;
+                       new_cl = -1;
+                       break;
+               }
+               debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
+                     node, lmc, old_cl, old_cwl, new_cl);
+               modereg_params0.s.cl = new_cl;
+               lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
+                      modereg_params0.u64);
+       }
+
+       /*
+        * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
+        * SEQ_CTL[SEQ_SEL] = MRW.
+        */
+       // Use the default values, from the CSRs fields
+       // also, do B-sides for RDIMMs...
+
+       for (rankx = 0; rankx < 4; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               // for RDIMMs, B-side writes should get done automatically
+               // when the A-side is written
+               ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
+                        0 /*MRreg */, 0 /*A-side */);  /* MR0 */
+               ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
+                        5 /*MRreg */, 0 /*A-side */);  /* MR5 */
+       }
+
+       /*
+        * 5. conduct DBI bit deskew training via the General Purpose
+        * R/W sequence (dbtrain). may need to run this over and over to get
+        * a lock (I need up to 5 in simulation):
+        * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
+        * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
+        * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
+        * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
+        * DBTRAIN_CTL.RW_TRAIN = 1
+        * DBTRAIN_CTL.READ_DQ_COUNT = dont care
+        * DBTRAIN_CTL.WRITE_ENA = 1;
+        * DBTRAIN_CTL.ACTIVATE = 1;
+        * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
+        * valid address
+        */
+
+       // NOW - do the training
+       debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
+
+       active_ranks = 0;
+       for (rankx = 0; rankx < rank_max; rankx++) {
+               if (!(rank_mask & (1 << rankx)))
+                       continue;
+
+               phys_addr = rank_offset * active_ranks;
+               // FIXME: now done by test_dram_byte_hw()
+
+               active_ranks++;
+
+               retries = 0;
+
+restart_training:
+
+               // NOTE: return is a bitmask of the erroring bytelanes -
+               // we only print it
+               errors =
+                   test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
+
+               debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
+                     node, lmc, rankx, (unsigned long long)phys_addr, errors);
+
+               // NEXT - check for locking
+               unlocked = 0;
+               read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
+
+               for (byte = 0; byte < (8 + ecc_ena); byte++)
+                       unlocked += (dbi_settings[byte] & 1) ^ 1;
+
+               // FIXME: print out the DBI settings array after each rank?
+               if (rank_max > 1)       // only when doing more than 1 rank
+                       display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
+                                                dbi_settings, " RANK");
+
+               if (unlocked > 0) {
+                       debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
+                             node, lmc, unlocked);
+                       retries++;
+                       if (retries < 10) {
+                               goto restart_training;
+                       } else {
+                               debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
+                                     node, lmc, retries);
+                       }
+               }
+       }                       /* for (rankx = 0; rankx < 4; rankx++) */
+
+       // print out the final DBI settings array
+       display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
+                                "FINAL");
+}
+
+void cvmx_dbi_switchover(struct ddr_priv *priv)
+{
+       int lmc;
+       int num_lmcs = cvmx_dram_get_num_lmc(priv);
+
+       for (lmc = 0; lmc < num_lmcs; lmc++)
+               cvmx_dbi_switchover_interface(priv, lmc);
+}
diff --git a/drivers/ram/octeon/octeon_ddr.c b/drivers/ram/octeon/octeon_ddr.c
new file mode 100644 (file)
index 0000000..757436b
--- /dev/null
@@ -0,0 +1,2728 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <command.h>
+#include <config.h>
+#include <dm.h>
+#include <hang.h>
+#include <i2c.h>
+#include <ram.h>
+#include <time.h>
+
+#include <asm/sections.h>
+#include <linux/io.h>
+
+#include <mach/octeon_ddr.h>
+
+#define CONFIG_REF_HERTZ       50000000
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* Sign of an integer */
+static s64 _sign(s64 v)
+{
+       return (v < 0);
+}
+
+#ifndef DDR_NO_DEBUG
+char *lookup_env(struct ddr_priv *priv, const char *format, ...)
+{
+       char *s;
+       unsigned long value;
+       va_list args;
+       char buffer[64];
+
+       va_start(args, format);
+       vsnprintf(buffer, sizeof(buffer), format, args);
+       va_end(args);
+
+       s = ddr_getenv_debug(priv, buffer);
+       if (s) {
+               value = simple_strtoul(s, NULL, 0);
+               printf("Parameter found in environment %s=\"%s\" 0x%lx (%ld)\n",
+                      buffer, s, value, value);
+       }
+
+       return s;
+}
+
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...)
+{
+       char *s;
+       u64 value;
+       va_list args;
+       char buffer[64];
+
+       va_start(args, format);
+       vsnprintf(buffer, sizeof(buffer), format, args);
+       va_end(args);
+
+       s = ddr_getenv_debug(priv, buffer);
+       if (s) {
+               value = simple_strtoull(s, NULL, 0);
+               printf("Parameter found in environment. %s = 0x%016llx\n",
+                      buffer, value);
+       }
+
+       return s;
+}
+#else
+char *lookup_env(struct ddr_priv *priv, const char *format, ...)
+{
+       return NULL;
+}
+
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...)
+{
+       return NULL;
+}
+#endif
+
+/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
+#define CVMX_L2C_TADS  ((OCTEON_IS_MODEL(OCTEON_CN68XX) ||             \
+                        OCTEON_IS_MODEL(OCTEON_CN73XX) ||              \
+                        OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 4 :         \
+                       (OCTEON_IS_MODEL(OCTEON_CN78XX)) ? 8 : 1)
+
+/* Number of L2C IOBs connected to LMC. */
+#define CVMX_L2C_IOBS  ((OCTEON_IS_MODEL(OCTEON_CN68XX) ||             \
+                        OCTEON_IS_MODEL(OCTEON_CN78XX) ||              \
+                        OCTEON_IS_MODEL(OCTEON_CN73XX) ||              \
+                        OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 2 : 1)
+
+#define CVMX_L2C_MAX_MEMSZ_ALLOWED (OCTEON_IS_OCTEON2() ?              \
+                                   (32 * CVMX_L2C_TADS) :              \
+                                   (OCTEON_IS_MODEL(OCTEON_CN70XX) ?   \
+                                    512 : (OCTEON_IS_OCTEON3() ? 1024 : 0)))
+
+/**
+ * Initialize the BIG address in L2C+DRAM to generate proper error
+ * on reading/writing to an non-existent memory location.
+ *
+ * @param node      OCX CPU node number
+ * @param mem_size  Amount of DRAM configured in MB.
+ * @param mode      Allow/Disallow reporting errors L2C_INT_SUM[BIGRD,BIGWR].
+ */
+static void cvmx_l2c_set_big_size(struct ddr_priv *priv, u64 mem_size, int mode)
+{
+       if ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) &&
+           !OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) {
+               union cvmx_l2c_big_ctl big_ctl;
+               int bits = 0, zero_bits = 0;
+               u64 mem;
+
+               if (mem_size > (CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024ull)) {
+                       printf("WARNING: Invalid memory size(%lld) requested, should be <= %lld\n",
+                              mem_size,
+                              (u64)CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024);
+                       mem_size = CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024;
+               }
+
+               mem = mem_size;
+               while (mem) {
+                       if ((mem & 1) == 0)
+                               zero_bits++;
+                       bits++;
+                       mem >>= 1;
+               }
+
+               if ((bits - zero_bits) != 1 || (bits - 9) <= 0) {
+                       printf("ERROR: Invalid DRAM size (%lld) requested, refer to L2C_BIG_CTL[maxdram] for valid options.\n",
+                              mem_size);
+                       return;
+               }
+
+               /*
+                * The BIG/HOLE is logic is not supported in pass1 as per
+                * Errata L2C-17736
+                */
+               if (mode == 0 && OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+                       mode = 1;
+
+               big_ctl.u64 = 0;
+               big_ctl.s.maxdram = bits - 9;
+               big_ctl.cn61xx.disable = mode;
+               l2c_wr(priv, CVMX_L2C_BIG_CTL, big_ctl.u64);
+       }
+}
+
+static u32 octeon3_refclock(u32 alt_refclk, u32 ddr_hertz,
+                           struct dimm_config *dimm_config)
+{
+       u32 ddr_ref_hertz = CONFIG_REF_HERTZ;
+       int ddr_type;
+       int spd_dimm_type;
+
+       debug("%s(%u, %u, %p)\n", __func__, alt_refclk, ddr_hertz, dimm_config);
+
+       /* Octeon 3 case... */
+
+       /* we know whether alternate refclk is always wanted
+        * we also know already if we want 2133 MT/s
+        * if alt refclk not always wanted, then probe DDR and
+        * DIMM type if DDR4 and RDIMMs, then set desired refclk
+        * to 100MHz, otherwise to default (50MHz)
+        * depend on ddr_initialize() to do the refclk selection
+        * and validation/
+        */
+       if (alt_refclk) {
+               /*
+                * If alternate refclk was specified, let it override
+                * everything
+                */
+               ddr_ref_hertz = alt_refclk * 1000000;
+               printf("%s: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n",
+                      __func__, alt_refclk);
+       } else if (ddr_hertz > 1000000000) {
+               ddr_type = get_ddr_type(dimm_config, 0);
+               spd_dimm_type = get_dimm_module_type(dimm_config, 0, ddr_type);
+
+               debug("ddr type: 0x%x, dimm type: 0x%x\n", ddr_type,
+                     spd_dimm_type);
+               /* Is DDR4 and RDIMM just to be sure. */
+               if (ddr_type == DDR4_DRAM &&
+                   (spd_dimm_type == 1 || spd_dimm_type == 5 ||
+                    spd_dimm_type == 8)) {
+                       /* Yes, we require 100MHz refclk, so set it. */
+                       ddr_ref_hertz = 100000000;
+                       puts("DRAM init: 100 MHz refclk is REQUIRED\n");
+               }
+       }
+
+       debug("%s: speed: %u\n", __func__, ddr_ref_hertz);
+       return ddr_ref_hertz;
+}
+
+int encode_row_lsb_ddr3(int row_lsb)
+{
+       int row_lsb_start = 14;
+
+       /* Decoding for row_lsb        */
+       /* 000: row_lsb = mem_adr[14]  */
+       /* 001: row_lsb = mem_adr[15]  */
+       /* 010: row_lsb = mem_adr[16]  */
+       /* 011: row_lsb = mem_adr[17]  */
+       /* 100: row_lsb = mem_adr[18]  */
+       /* 101: row_lsb = mem_adr[19]  */
+       /* 110: row_lsb = mem_adr[20]  */
+       /* 111: RESERVED               */
+
+       if (octeon_is_cpuid(OCTEON_CN6XXX) ||
+           octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX))
+               row_lsb_start = 14;
+       else
+               printf("ERROR: Unsupported Octeon model: 0x%x\n",
+                      read_c0_prid());
+
+       return row_lsb - row_lsb_start;
+}
+
+int encode_pbank_lsb_ddr3(int pbank_lsb)
+{
+       /* Decoding for pbank_lsb                                        */
+       /* 0000:DIMM = mem_adr[28]    / rank = mem_adr[27] (if RANK_ENA) */
+       /* 0001:DIMM = mem_adr[29]    / rank = mem_adr[28]      "        */
+       /* 0010:DIMM = mem_adr[30]    / rank = mem_adr[29]      "        */
+       /* 0011:DIMM = mem_adr[31]    / rank = mem_adr[30]      "        */
+       /* 0100:DIMM = mem_adr[32]    / rank = mem_adr[31]      "        */
+       /* 0101:DIMM = mem_adr[33]    / rank = mem_adr[32]      "        */
+       /* 0110:DIMM = mem_adr[34]    / rank = mem_adr[33]      "        */
+       /* 0111:DIMM = 0              / rank = mem_adr[34]      "        */
+       /* 1000-1111: RESERVED                                           */
+
+       int pbank_lsb_start = 0;
+
+       if (octeon_is_cpuid(OCTEON_CN6XXX) ||
+           octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX))
+               pbank_lsb_start = 28;
+       else
+               printf("ERROR: Unsupported Octeon model: 0x%x\n",
+                      read_c0_prid());
+
+       return pbank_lsb - pbank_lsb_start;
+}
+
+static void set_ddr_clock_initialized(struct ddr_priv *priv, int if_num,
+                                     bool inited_flag)
+{
+       priv->ddr_clock_initialized[if_num] = inited_flag;
+}
+
+static int ddr_clock_initialized(struct ddr_priv *priv, int if_num)
+{
+       return priv->ddr_clock_initialized[if_num];
+}
+
+static void set_ddr_memory_preserved(struct ddr_priv *priv)
+{
+       priv->ddr_memory_preserved = true;
+}
+
+bool ddr_memory_preserved(struct ddr_priv *priv)
+{
+       return priv->ddr_memory_preserved;
+}
+
+static void cn78xx_lmc_dreset_init(struct ddr_priv *priv, int if_num)
+{
+       union cvmx_lmcx_dll_ctl2 dll_ctl2;
+
+       /*
+        * The remainder of this section describes the sequence for LMCn.
+        *
+        * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value
+        * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from
+        * that set in the prior Step 3), including
+        * LMC(0..3)_DLL_CTL2[DRESET] = 1.
+        *
+        * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write
+        * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1.
+        */
+
+       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+       dll_ctl2.cn78xx.dll_bringup = 1;
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+       /*
+        * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+        */
+
+       lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+       /*
+        * 4. Wait for a minimum of 10 LMC CK cycles.
+        */
+
+       udelay(1);
+
+       /*
+        * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+        * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1.
+        * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point
+        * without restarting the LMCn DRESET initialization sequence.
+        */
+
+       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+       dll_ctl2.cn78xx.quad_dll_ena = 1;
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+       /*
+        * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+        */
+
+       lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+       /*
+        * 7. Wait a minimum of 10 us.
+        */
+
+       udelay(10);
+
+       /*
+        * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+        * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0.
+        * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point
+        * without restarting the LMCn DRESET initialization sequence.
+        */
+
+       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+       dll_ctl2.cn78xx.dll_bringup = 0;
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+       /*
+        * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+        */
+
+       lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+       /*
+        * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+        * LMC(0..3)_DLL_CTL2[DRESET] = 0.
+        * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without
+        * restarting the LMCn DRESET initialization sequence.
+        *
+        * After completing LMCn DRESET initialization, all LMC CSRs may be
+        * accessed.  Prior to completing LMC DRESET initialization, only
+        * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and
+        * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed.
+        */
+
+       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+       dll_ctl2.cn78xx.dreset = 0;
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+}
+
+int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf,
+                        u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz,
+                        int if_num, u32 if_mask)
+{
+       char *s;
+
+       if (ddr_clock_initialized(priv, if_num))
+               return 0;
+
+       if (!ddr_clock_initialized(priv, 0)) {  /* Do this once */
+               union cvmx_lmcx_reset_ctl reset_ctl;
+               int i;
+
+               /*
+                * Check to see if memory is to be preserved and set global
+                * flag
+                */
+               for (i = 3; i >= 0; --i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+                       if (reset_ctl.s.ddr3psv == 1) {
+                               debug("LMC%d Preserving memory\n", i);
+                               set_ddr_memory_preserved(priv);
+
+                               /* Re-initialize flags */
+                               reset_ctl.s.ddr3pwarm = 0;
+                               reset_ctl.s.ddr3psoft = 0;
+                               reset_ctl.s.ddr3psv = 0;
+                               lmc_wr(priv, CVMX_LMCX_RESET_CTL(i),
+                                      reset_ctl.u64);
+                       }
+               }
+       }
+
+       /*
+        * ToDo: Add support for these SoCs:
+        *
+        * if (octeon_is_cpuid(OCTEON_CN63XX) ||
+        * octeon_is_cpuid(OCTEON_CN66XX) ||
+        * octeon_is_cpuid(OCTEON_CN61XX) || octeon_is_cpuid(OCTEON_CNF71XX))
+        *
+        * and
+        *
+        * if (octeon_is_cpuid(OCTEON_CN68XX))
+        *
+        * and
+        *
+        * if (octeon_is_cpuid(OCTEON_CN70XX))
+        *
+        */
+
+       if (octeon_is_cpuid(OCTEON_CN78XX) || octeon_is_cpuid(OCTEON_CN73XX) ||
+           octeon_is_cpuid(OCTEON_CNF75XX)) {
+               union cvmx_lmcx_dll_ctl2 dll_ctl2;
+               union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+               union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+               struct dimm_config *dimm_config_table =
+                       ddr_conf->dimm_config_table;
+               int en_idx, save_en_idx, best_en_idx = 0;
+               u64 clkf, clkr, max_clkf = 127;
+               u64 best_clkf = 0, best_clkr = 0;
+               u64 best_pll_MHz = 0;
+               u64 pll_MHz;
+               u64 min_pll_MHz = 800;
+               u64 max_pll_MHz = 5000;
+               u64 error;
+               u64 best_error;
+               u64 best_calculated_ddr_hertz = 0;
+               u64 calculated_ddr_hertz = 0;
+               u64 orig_ddr_hertz = ddr_hertz;
+               const int _en[] = { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 };
+               int override_pll_settings;
+               int new_bwadj;
+               int ddr_type;
+               int i;
+
+               /* ddr_type only indicates DDR4 or DDR3 */
+               ddr_type = (read_spd(&dimm_config_table[0], 0,
+                                    DDR4_SPD_KEY_BYTE_DEVICE_TYPE) ==
+                           0x0C) ? DDR4_DRAM : DDR3_DRAM;
+
+               /*
+                * 5.9 LMC Initialization Sequence
+                *
+                * There are 13 parts to the LMC initialization procedure:
+                *
+                * 1. DDR PLL initialization
+                *
+                * 2. LMC CK initialization
+                *
+                * 3. LMC interface enable initialization
+                *
+                * 4. LMC DRESET initialization
+                *
+                * 5. LMC CK local initialization
+                *
+                * 6. LMC RESET initialization
+                *
+                * 7. Early LMC initialization
+                *
+                * 8. LMC offset training
+                *
+                * 9. LMC internal Vref training
+                *
+                * 10. LMC deskew training
+                *
+                * 11. LMC write leveling
+                *
+                * 12. LMC read leveling
+                *
+                * 13. Final LMC initialization
+                *
+                * CN78XX supports two modes:
+                *
+                * - two-LMC mode: both LMCs 2/3 must not be enabled
+                * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and
+                * LMC2/3_DLL_CTL2[INTF_EN]
+                * must be set to 0) and both LMCs 0/1 must be enabled).
+                *
+                * - four-LMC mode: all four LMCs 0..3 must be enabled.
+                *
+                * Steps 4 and 6..13 should each be performed for each
+                * enabled LMC (either twice or four times). Steps 1..3 and
+                * 5 are more global in nature and each must be executed
+                * exactly once (not once per LMC) each time the DDR PLL
+                * changes or is first brought up. Steps 1..3 and 5 need
+                * not be performed if the DDR PLL is stable.
+                *
+                * Generally, the steps are performed in order. The exception
+                * is that the CK local initialization (step 5) must be
+                * performed after some DRESET initializations (step 4) and
+                * before other DRESET initializations when the DDR PLL is
+                * brought up or changed. (The CK local initialization uses
+                * information from some LMCs to bring up the other local
+                * CKs.) The following text describes these ordering
+                * requirements in more detail.
+                *
+                * Following any chip reset, the DDR PLL must be brought up,
+                * and all 13 steps should be executed. Subsequently, it is
+                * possible to execute only steps 4 and 6..13, or to execute
+                * only steps 8..13.
+                *
+                * The remainder of this section covers these initialization
+                * steps in sequence.
+                */
+
+               /* Do the following init only once */
+               if (if_num != 0)
+                       goto not_if0;
+
+               /* Only for interface #0 ... */
+
+               /*
+                * 5.9.3 LMC Interface-Enable Initialization
+                *
+                * LMC interface-enable initialization (Step 3) must be#
+                * performed after Step 2 for each chip reset and whenever
+                * the DDR clock speed changes. This step needs to be
+                * performed only once, not once per LMC. Perform the
+                * following three substeps for the LMC interface-enable
+                * initialization:
+                *
+                * 1. Without changing any other LMC2_DLL_CTL2 fields
+                * (LMC(0..3)_DLL_CTL2 should be at their reset values after
+                * Step 1), write LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC
+                * mode is desired.
+                *
+                * 2. Without changing any other LMC3_DLL_CTL2 fields, write
+                * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+                *
+                * 3. Read LMC2_DLL_CTL2 and wait for the result.
+                *
+                * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN]
+                * values should not be changed by software from this point.
+                */
+
+               for (i = 0; i < 4; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+
+                       dll_ctl2.cn78xx.byp_setting = 0;
+                       dll_ctl2.cn78xx.byp_sel = 0;
+                       dll_ctl2.cn78xx.quad_dll_ena = 0;
+                       dll_ctl2.cn78xx.dreset = 1;
+                       dll_ctl2.cn78xx.dll_bringup = 0;
+                       dll_ctl2.cn78xx.intf_en = 0;
+
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64);
+               }
+
+               /*
+                * ###### Interface enable (intf_en) deferred until after
+                * DDR_DIV_RESET=0 #######
+                */
+
+               /*
+                * 5.9.1 DDR PLL Initialization
+                *
+                * DDR PLL initialization (Step 1) must be performed for each
+                * chip reset and whenever the DDR clock speed changes. This
+                * step needs to be performed only once, not once per LMC.
+                *
+                * Perform the following eight substeps to initialize the
+                * DDR PLL:
+                *
+                * 1. If not done already, write all fields in
+                * LMC(0..3)_DDR_PLL_CTL and
+                * LMC(0..1)_DLL_CTL2 to their reset values, including:
+                *
+                * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1
+                * .. LMC0_DLL_CTL2[DRESET] = 1
+                *
+                * This substep is not necessary after a chip reset.
+                *
+                */
+
+               ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+               ddr_pll_ctl.cn78xx.reset_n = 0;
+               ddr_pll_ctl.cn78xx.ddr_div_reset = 1;
+               ddr_pll_ctl.cn78xx.phy_dcok = 0;
+
+               /*
+                * 73XX pass 1.3 has LMC0 DCLK_INVERT tied to 1; earlier
+                * 73xx passes are tied to 0
+                *
+                * 75XX needs LMC0 DCLK_INVERT set to 1 to minimize duty
+                * cycle falling points
+                *
+                * and we default all other chips LMC0 to DCLK_INVERT=0
+                */
+               ddr_pll_ctl.cn78xx.dclk_invert =
+                   !!(octeon_is_cpuid(OCTEON_CN73XX_PASS1_3) ||
+                      octeon_is_cpuid(OCTEON_CNF75XX));
+
+               /*
+                * allow override of LMC0 desired setting for DCLK_INVERT,
+                * but not on 73XX;
+                * we cannot change LMC0 DCLK_INVERT on 73XX any pass
+                */
+               if (!(octeon_is_cpuid(OCTEON_CN73XX))) {
+                       s = lookup_env(priv, "ddr0_set_dclk_invert");
+                       if (s) {
+                               ddr_pll_ctl.cn78xx.dclk_invert =
+                                   !!simple_strtoul(s, NULL, 0);
+                               debug("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n",
+                                     ddr_pll_ctl.cn78xx.dclk_invert);
+                       }
+               }
+
+               lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u64);
+               debug("%-45s : 0x%016llx\n", "LMC0: DDR_PLL_CTL",
+                     ddr_pll_ctl.u64);
+
+               // only when LMC1 is active
+               if (if_mask & 0x2) {
+                       /*
+                        * For CNF75XX, both LMC0 and LMC1 use the same PLL,
+                        * so we use the LMC0 setting of DCLK_INVERT for LMC1.
+                        */
+                       if (!octeon_is_cpuid(OCTEON_CNF75XX)) {
+                               int override = 0;
+
+                               /*
+                                * by default, for non-CNF75XX, we want
+                                * LMC1 toggled LMC0
+                                */
+                               int lmc0_dclk_invert =
+                                   ddr_pll_ctl.cn78xx.dclk_invert;
+
+                               /*
+                                * FIXME: work-around for DDR3 UDIMM problems
+                                * is to use LMC0 setting on LMC1 and if
+                                * 73xx pass 1.3, we want to default LMC1
+                                * DCLK_INVERT to LMC0, not the invert of LMC0
+                                */
+                               int lmc1_dclk_invert;
+
+                               lmc1_dclk_invert =
+                                       ((ddr_type == DDR4_DRAM) &&
+                                        !octeon_is_cpuid(OCTEON_CN73XX_PASS1_3))
+                                       ? lmc0_dclk_invert ^ 1 :
+                                       lmc0_dclk_invert;
+
+                               /*
+                                * allow override of LMC1 desired setting for
+                                * DCLK_INVERT
+                                */
+                               s = lookup_env(priv, "ddr1_set_dclk_invert");
+                               if (s) {
+                                       lmc1_dclk_invert =
+                                               !!simple_strtoul(s, NULL, 0);
+                                       override = 1;
+                               }
+                               debug("LMC1: %s DDR_PLL_CTL[dclk_invert] to %d (LMC0 %d)\n",
+                                     (override) ? "override" :
+                                     "default", lmc1_dclk_invert,
+                                     lmc0_dclk_invert);
+
+                               ddr_pll_ctl.cn78xx.dclk_invert =
+                                       lmc1_dclk_invert;
+                       }
+
+                       // but always write LMC1 CSR if it is active
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u64);
+                       debug("%-45s : 0x%016llx\n",
+                             "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u64);
+               }
+
+               /*
+                * 2. If the current DRAM contents are not preserved (see
+                * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate
+                * time to assert the RESET# pin of the DDR3/DDR4 DRAM parts.
+                * If desired, write
+                * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other
+                * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin.
+                * No action is required here to assert DDR_RESET_L
+                * following a chip reset. Refer to Section 5.9.6. Do this
+                * for all enabled LMCs.
+                */
+
+               for (i = 0; (!ddr_memory_preserved(priv)) && i < 4; ++i) {
+                       union cvmx_lmcx_reset_ctl reset_ctl;
+
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+                       reset_ctl.cn78xx.ddr3rst = 0;   /* Reset asserted */
+                       debug("LMC%d Asserting DDR_RESET_L\n", i);
+                       lmc_wr(priv, CVMX_LMCX_RESET_CTL(i), reset_ctl.u64);
+                       lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+               }
+
+               /*
+                * 3. Without changing any other LMC0_DDR_PLL_CTL values,
+                * write LMC0_DDR_PLL_CTL[CLKF] with a value that gives a
+                * desired DDR PLL speed. The LMC0_DDR_PLL_CTL[CLKF] value
+                * should be selected in conjunction with the post-scalar
+                * divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN]) so
+                * that the desired LMC CK speeds are is produced (all
+                * enabled LMCs must run the same speed). Section 5.14
+                * describes LMC0_DDR_PLL_CTL[CLKF] and
+                * LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings that produce
+                * the desired LMC CK speed. Section 5.9.2 describes LMC CK
+                * initialization, which can be done separately from the DDR
+                * PLL initialization described in this section.
+                *
+                * The LMC0_DDR_PLL_CTL[CLKF] value must not change after
+                * this point without restarting this SDRAM PLL
+                * initialization sequence.
+                */
+
+               /* Init to max error */
+               error = ddr_hertz;
+               best_error = ddr_hertz;
+
+               debug("DDR Reference Hertz = %d\n", ddr_ref_hertz);
+
+               while (best_error == ddr_hertz) {
+                       for (clkr = 0; clkr < 4; ++clkr) {
+                               for (en_idx =
+                                    sizeof(_en) / sizeof(int) -
+                                    1; en_idx >= 0; --en_idx) {
+                                       save_en_idx = en_idx;
+                                       clkf =
+                                           ((ddr_hertz) *
+                                            (clkr + 1) * (_en[save_en_idx]));
+                                       clkf = divide_nint(clkf, ddr_ref_hertz)
+                                           - 1;
+                                       pll_MHz =
+                                           ddr_ref_hertz *
+                                           (clkf + 1) / (clkr + 1) / 1000000;
+                                       calculated_ddr_hertz =
+                                           ddr_ref_hertz *
+                                           (clkf +
+                                            1) / ((clkr +
+                                                   1) * (_en[save_en_idx]));
+                                       error =
+                                           ddr_hertz - calculated_ddr_hertz;
+
+                                       if (pll_MHz < min_pll_MHz ||
+                                           pll_MHz > max_pll_MHz)
+                                               continue;
+                                       if (clkf > max_clkf) {
+                                               /*
+                                                * PLL requires clkf to be
+                                                * limited
+                                                */
+                                               continue;
+                                       }
+                                       if (abs(error) > abs(best_error))
+                                               continue;
+
+                                       debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld\n",
+                                             clkr, save_en_idx,
+                                             _en[save_en_idx], clkf, pll_MHz,
+                                            calculated_ddr_hertz, error);
+
+                                       /* Favor the highest PLL frequency. */
+                                       if (abs(error) < abs(best_error) ||
+                                           pll_MHz > best_pll_MHz) {
+                                               best_pll_MHz = pll_MHz;
+                                               best_calculated_ddr_hertz =
+                                                       calculated_ddr_hertz;
+                                               best_error = error;
+                                               best_clkr = clkr;
+                                               best_clkf = clkf;
+                                               best_en_idx = save_en_idx;
+                                       }
+                               }
+                       }
+
+                       override_pll_settings = 0;
+
+                       s = lookup_env(priv, "ddr_pll_clkr");
+                       if (s) {
+                               best_clkr = simple_strtoul(s, NULL, 0);
+                               override_pll_settings = 1;
+                       }
+
+                       s = lookup_env(priv, "ddr_pll_clkf");
+                       if (s) {
+                               best_clkf = simple_strtoul(s, NULL, 0);
+                               override_pll_settings = 1;
+                       }
+
+                       s = lookup_env(priv, "ddr_pll_en_idx");
+                       if (s) {
+                               best_en_idx = simple_strtoul(s, NULL, 0);
+                               override_pll_settings = 1;
+                       }
+
+                       if (override_pll_settings) {
+                               best_pll_MHz =
+                                   ddr_ref_hertz * (best_clkf +
+                                                    1) /
+                                   (best_clkr + 1) / 1000000;
+                               best_calculated_ddr_hertz =
+                                   ddr_ref_hertz * (best_clkf +
+                                                    1) /
+                                   ((best_clkr + 1) * (_en[best_en_idx]));
+                               best_error =
+                                   ddr_hertz - best_calculated_ddr_hertz;
+                       }
+
+                       debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld <==\n",
+                             best_clkr, best_en_idx, _en[best_en_idx],
+                             best_clkf, best_pll_MHz,
+                             best_calculated_ddr_hertz, best_error);
+
+                       /*
+                        * Try lowering the frequency if we can't get a
+                        * working configuration
+                        */
+                       if (best_error == ddr_hertz) {
+                               if (ddr_hertz < orig_ddr_hertz - 10000000)
+                                       break;
+                               ddr_hertz -= 1000000;
+                               best_error = ddr_hertz;
+                       }
+               }
+
+               if (best_error == ddr_hertz) {
+                       printf("ERROR: Can not compute a legal DDR clock speed configuration.\n");
+                       return -1;
+               }
+
+               new_bwadj = (best_clkf + 1) / 10;
+               debug("bwadj: %2d\n", new_bwadj);
+
+               s = lookup_env(priv, "ddr_pll_bwadj");
+               if (s) {
+                       new_bwadj = strtoul(s, NULL, 0);
+                       debug("bwadj: %2d\n", new_bwadj);
+               }
+
+               for (i = 0; i < 2; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       ddr_pll_ctl.u64 =
+                           lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+                       debug("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
+                             i, ddr_pll_ctl.u64);
+
+                       ddr_pll_ctl.cn78xx.ddr_ps_en = best_en_idx;
+                       ddr_pll_ctl.cn78xx.clkf = best_clkf;
+                       ddr_pll_ctl.cn78xx.clkr = best_clkr;
+                       ddr_pll_ctl.cn78xx.reset_n = 0;
+                       ddr_pll_ctl.cn78xx.bwadj = new_bwadj;
+
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+                       debug("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
+                             i, ddr_pll_ctl.u64);
+
+                       /*
+                        * For cnf75xx LMC0 and LMC1 use the same PLL so
+                        * only program LMC0 PLL.
+                        */
+                       if (octeon_is_cpuid(OCTEON_CNF75XX))
+                               break;
+               }
+
+               for (i = 0; i < 4; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       /*
+                        * 4. Read LMC0_DDR_PLL_CTL and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 5. Wait a minimum of 3 us.
+                        */
+
+                       udelay(3);      /* Wait 3 us */
+
+                       /*
+                        * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without
+                        * changing any other LMC0_DDR_PLL_CTL values.
+                        */
+
+                       ddr_pll_ctl.u64 =
+                           lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+                       ddr_pll_ctl.cn78xx.reset_n = 1;
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+                       /*
+                        * 7. Read LMC0_DDR_PLL_CTL and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 8. Wait a minimum of 25 us.
+                        */
+
+                       udelay(25);     /* Wait 25 us */
+
+                       /*
+                        * For cnf75xx LMC0 and LMC1 use the same PLL so
+                        * only program LMC0 PLL.
+                        */
+                       if (octeon_is_cpuid(OCTEON_CNF75XX))
+                               break;
+               }
+
+               for (i = 0; i < 4; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       /*
+                        * 5.9.2 LMC CK Initialization
+                        *
+                        * DDR PLL initialization must be completed prior to
+                        * starting LMC CK initialization.
+                        *
+                        * Perform the following substeps to initialize the
+                        * LMC CK:
+                        *
+                        * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL
+                        * values, write
+                        * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and
+                        * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the
+                        * appropriate value to get the desired LMC CK speed.
+                        * Section 5.14 discusses CLKF and DDR_PS_EN
+                        * programmings.  The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN]
+                        * must not change after this point without restarting
+                        * this LMC CK initialization sequence.
+                        */
+
+                       ddr_pll_ctl.u64 = lmc_rd(priv,
+                                                CVMX_LMCX_DDR_PLL_CTL(i));
+                       ddr_pll_ctl.cn78xx.ddr_div_reset = 1;
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+                       /*
+                        * 2. Without changing any other fields in
+                        * LMC(0..3)_DDR_PLL_CTL, write
+                        * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0.
+                        */
+
+                       ddr_pll_ctl.u64 =
+                           lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+                       ddr_pll_ctl.cn78xx.ddr4_mode =
+                           (ddr_type == DDR4_DRAM) ? 1 : 0;
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+                       /*
+                        * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+                        * result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 4. Wait a minimum of 1 us.
+                        */
+
+                       udelay(1);      /* Wait 1 us */
+
+                       /*
+                        * ###### Steps 5 through 7 deferred until after
+                        * DDR_DIV_RESET=0 #######
+                        */
+
+                       /*
+                        * 8. Without changing any other LMC(0..3)_COMP_CTL2
+                        * values, write
+                        * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL]
+                        * to the desired DDR*_CK_*_P control and command
+                        * signals drive strength.
+                        */
+
+                       union cvmx_lmcx_comp_ctl2 comp_ctl2;
+                       const struct ddr3_custom_config *custom_lmc_config =
+                           &ddr_conf->custom_lmc_config;
+
+                       comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(i));
+
+                       /* Default 4=34.3 ohm */
+                       comp_ctl2.cn78xx.dqx_ctl =
+                           (custom_lmc_config->dqx_ctl ==
+                            0) ? 4 : custom_lmc_config->dqx_ctl;
+                       /* Default 4=34.3 ohm */
+                       comp_ctl2.cn78xx.ck_ctl =
+                           (custom_lmc_config->ck_ctl ==
+                            0) ? 4 : custom_lmc_config->ck_ctl;
+                       /* Default 4=34.3 ohm */
+                       comp_ctl2.cn78xx.cmd_ctl =
+                           (custom_lmc_config->cmd_ctl ==
+                            0) ? 4 : custom_lmc_config->cmd_ctl;
+
+                       comp_ctl2.cn78xx.rodt_ctl = 0x4;        /* 60 ohm */
+
+                       comp_ctl2.cn70xx.ptune_offset =
+                           (abs(custom_lmc_config->ptune_offset) & 0x7)
+                           | (_sign(custom_lmc_config->ptune_offset) << 3);
+                       comp_ctl2.cn70xx.ntune_offset =
+                           (abs(custom_lmc_config->ntune_offset) & 0x7)
+                           | (_sign(custom_lmc_config->ntune_offset) << 3);
+
+                       s = lookup_env(priv, "ddr_clk_ctl");
+                       if (s) {
+                               comp_ctl2.cn78xx.ck_ctl =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_ck_ctl");
+                       if (s) {
+                               comp_ctl2.cn78xx.ck_ctl =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_cmd_ctl");
+                       if (s) {
+                               comp_ctl2.cn78xx.cmd_ctl =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_dqx_ctl");
+                       if (s) {
+                               comp_ctl2.cn78xx.dqx_ctl =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_ptune_offset");
+                       if (s) {
+                               comp_ctl2.cn78xx.ptune_offset =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       s = lookup_env(priv, "ddr_ntune_offset");
+                       if (s) {
+                               comp_ctl2.cn78xx.ntune_offset =
+                                   simple_strtoul(s, NULL, 0);
+                       }
+
+                       lmc_wr(priv, CVMX_LMCX_COMP_CTL2(i), comp_ctl2.u64);
+
+                       /*
+                        * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+                        * result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 10. Wait a minimum of 200 ns.
+                        */
+
+                       udelay(1);      /* Wait 1 us */
+
+                       /*
+                        * 11. Without changing any other
+                        * LMC(0..3)_DDR_PLL_CTL values, write
+                        * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0.
+                        */
+
+                       ddr_pll_ctl.u64 = lmc_rd(priv,
+                                                CVMX_LMCX_DDR_PLL_CTL(i));
+                       ddr_pll_ctl.cn78xx.ddr_div_reset = 0;
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+                       /*
+                        * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+                        * result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 13. Wait a minimum of 200 ns.
+                        */
+
+                       udelay(1);      /* Wait 1 us */
+               }
+
+               /*
+                * Relocated Interface Enable (intf_en) Step
+                */
+               for (i = (octeon_is_cpuid(OCTEON_CN73XX) ||
+                         octeon_is_cpuid(OCTEON_CNF75XX)) ? 1 : 2;
+                    i < 4; ++i) {
+                       /*
+                        * This step is only necessary for LMC 2 and 3 in
+                        * 4-LMC mode. The mask will cause the unpopulated
+                        * interfaces to be skipped.
+                        */
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+
+                       dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+                       dll_ctl2.cn78xx.intf_en = 1;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64);
+                       lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+               }
+
+               /*
+                * Relocated PHY_DCOK Step
+                */
+               for (i = 0; i < 4; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+                       /*
+                        * 5. Without changing any other fields in
+                        * LMC(0..3)_DDR_PLL_CTL, write
+                        * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1.
+                        */
+
+                       ddr_pll_ctl.u64 = lmc_rd(priv,
+                                                CVMX_LMCX_DDR_PLL_CTL(i));
+                       ddr_pll_ctl.cn78xx.phy_dcok = 1;
+                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+                       /*
+                        * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for
+                        * the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+                       /*
+                        * 7. Wait a minimum of 20 us.
+                        */
+
+                       udelay(20);     /* Wait 20 us */
+               }
+
+               /*
+                * 5.9.4 LMC DRESET Initialization
+                *
+                * All of the DDR PLL, LMC global CK, and LMC interface
+                * enable initializations must be completed prior to starting
+                * this LMC DRESET initialization (Step 4).
+                *
+                * This LMC DRESET step is done for all enabled LMCs.
+                *
+                * There are special constraints on the ordering of DRESET
+                * initialization (Steps 4) and CK local initialization
+                * (Step 5) whenever CK local initialization must be executed.
+                * CK local initialization must be executed whenever the DDR
+                * PLL is being brought up (for each chip reset* and whenever
+                * the DDR clock speed changes).
+                *
+                * When Step 5 must be executed in the two-LMC mode case:
+                * - LMC0 DRESET initialization must occur before Step 5.
+                * - LMC1 DRESET initialization must occur after Step 5.
+                *
+                * When Step 5 must be executed in the four-LMC mode case:
+                * - LMC2 and LMC3 DRESET initialization must occur before
+                *   Step 5.
+                * - LMC0 and LMC1 DRESET initialization must occur after
+                *   Step 5.
+                */
+
+               if (octeon_is_cpuid(OCTEON_CN73XX)) {
+                       /* ONE-LMC or TWO-LMC MODE BEFORE STEP 5 for cn73xx */
+                       cn78xx_lmc_dreset_init(priv, 0);
+               } else if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+                       if (if_mask == 0x3) {
+                               /*
+                                * 2-LMC Mode: LMC1 DRESET must occur
+                                * before Step 5
+                                */
+                               cn78xx_lmc_dreset_init(priv, 1);
+                       }
+               } else {
+                       /* TWO-LMC MODE DRESET BEFORE STEP 5 */
+                       if (if_mask == 0x3)
+                               cn78xx_lmc_dreset_init(priv, 0);
+
+                       /* FOUR-LMC MODE BEFORE STEP 5 */
+                       if (if_mask == 0xf) {
+                               cn78xx_lmc_dreset_init(priv, 2);
+                               cn78xx_lmc_dreset_init(priv, 3);
+                       }
+               }
+
+               /*
+                * 5.9.5 LMC CK Local Initialization
+                *
+                * All of DDR PLL, LMC global CK, and LMC interface-enable
+                * initializations must be completed prior to starting this
+                * LMC CK local initialization (Step 5).
+                *
+                * LMC CK Local initialization must be performed for each
+                * chip reset and whenever the DDR clock speed changes. This
+                * step needs to be performed only once, not once per LMC.
+                *
+                * There are special constraints on the ordering of DRESET
+                * initialization (Steps 4) and CK local initialization
+                * (Step 5) whenever CK local initialization must be executed.
+                * CK local initialization must be executed whenever the
+                * DDR PLL is being brought up (for each chip reset and
+                * whenever the DDR clock speed changes).
+                *
+                * When Step 5 must be executed in the two-LMC mode case:
+                * - LMC0 DRESET initialization must occur before Step 5.
+                * - LMC1 DRESET initialization must occur after Step 5.
+                *
+                * When Step 5 must be executed in the four-LMC mode case:
+                * - LMC2 and LMC3 DRESET initialization must occur before
+                *   Step 5.
+                * - LMC0 and LMC1 DRESET initialization must occur after
+                *   Step 5.
+                *
+                * LMC CK local initialization is different depending on
+                * whether two-LMC or four-LMC modes are desired.
+                */
+
+               if (if_mask == 0x3) {
+                       int temp_lmc_if_num = octeon_is_cpuid(OCTEON_CNF75XX) ?
+                               1 : 0;
+
+                       /*
+                        * 5.9.5.1 LMC CK Local Initialization for Two-LMC
+                        * Mode
+                        *
+                        * 1. Write LMC0_DLL_CTL3 to its reset value. (Note
+                        * that LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8
+                        * should also work.)
+                        */
+
+                       ddr_dll_ctl3.u64 = 0;
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+
+                       if (octeon_is_cpuid(OCTEON_CNF75XX))
+                               ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7;
+                       else
+                               ddr_dll_ctl3.cn78xx.dll90_byte_sel = 1;
+
+                       lmc_wr(priv,
+                              CVMX_LMCX_DLL_CTL3(temp_lmc_if_num),
+                              ddr_dll_ctl3.u64);
+
+                       /*
+                        * 2. Read LMC0_DLL_CTL3 and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num));
+
+                       /*
+                        * 3. Without changing any other fields in
+                        * LMC0_DLL_CTL3, write
+                        * LMC0_DLL_CTL3[DCLK90_FWD] = 1.  Writing
+                        * LMC0_DLL_CTL3[DCLK90_FWD] = 1
+                        * causes clock-delay information to be forwarded
+                        * from LMC0 to LMC1.
+                        */
+
+                       ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+                       lmc_wr(priv,
+                              CVMX_LMCX_DLL_CTL3(temp_lmc_if_num),
+                              ddr_dll_ctl3.u64);
+
+                       /*
+                        * 4. Read LMC0_DLL_CTL3 and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num));
+               }
+
+               if (if_mask == 0xf) {
+                       /*
+                        * 5.9.5.2 LMC CK Local Initialization for Four-LMC
+                        * Mode
+                        *
+                        * 1. Write LMC2_DLL_CTL3 to its reset value except
+                        * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7.
+                        */
+
+                       ddr_dll_ctl3.u64 = 0;
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+                       ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64);
+
+                       /*
+                        * 2. Write LMC3_DLL_CTL3 to its reset value except
+                        * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x2.
+                        */
+
+                       ddr_dll_ctl3.u64 = 0;
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+                       ddr_dll_ctl3.cn78xx.dll90_byte_sel = 2;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64);
+
+                       /*
+                        * 3. Read LMC3_DLL_CTL3 and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+
+                       /*
+                        * 4. Without changing any other fields in
+                        * LMC2_DLL_CTL3, write LMC2_DLL_CTL3[DCLK90_FWD] = 1
+                        * and LMC2_DLL_CTL3[DCLK90_RECAL_ DIS] = 1.
+                        * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2
+                        * to forward clockdelay information to LMC0. Setting
+                        * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2
+                        * from periodically recalibrating this delay
+                        * information.
+                        */
+
+                       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(2));
+                       ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64);
+
+                       /*
+                        * 5. Without changing any other fields in
+                        * LMC3_DLL_CTL3, write LMC3_DLL_CTL3[DCLK90_FWD] = 1
+                        * and LMC3_DLL_CTL3[DCLK90_RECAL_ DIS] = 1.
+                        * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3
+                        * to forward clockdelay information to LMC1. Setting
+                        * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3
+                        * from periodically recalibrating this delay
+                        * information.
+                        */
+
+                       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+                       ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64);
+
+                       /*
+                        * 6. Read LMC3_DLL_CTL3 and wait for the result.
+                        */
+
+                       lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+               }
+
+               if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+                       /*
+                        * cnf75xx 2-LMC Mode: LMC0 DRESET must occur after
+                        * Step 5, Do LMC0 for 1-LMC Mode here too
+                        */
+                       cn78xx_lmc_dreset_init(priv, 0);
+               }
+
+               /* TWO-LMC MODE AFTER STEP 5 */
+               if (if_mask == 0x3) {
+                       if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+                               /*
+                                * cnf75xx 2-LMC Mode: LMC0 DRESET must
+                                * occur after Step 5
+                                */
+                               cn78xx_lmc_dreset_init(priv, 0);
+                       } else {
+                               cn78xx_lmc_dreset_init(priv, 1);
+                       }
+               }
+
+               /* FOUR-LMC MODE AFTER STEP 5 */
+               if (if_mask == 0xf) {
+                       cn78xx_lmc_dreset_init(priv, 0);
+                       cn78xx_lmc_dreset_init(priv, 1);
+
+                       /*
+                        * Enable periodic recalibration of DDR90 delay
+                        * line in.
+                        */
+                       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(0));
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u64);
+                       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(1));
+                       ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(1), ddr_dll_ctl3.u64);
+               }
+
+               /* Enable fine tune mode for all LMCs */
+               for (i = 0; i < 4; ++i) {
+                       if ((if_mask & (1 << i)) == 0)
+                               continue;
+                       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(i));
+                       ddr_dll_ctl3.cn78xx.fine_tune_mode = 1;
+                       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(i), ddr_dll_ctl3.u64);
+               }
+
+               /*
+                * Enable the trim circuit on the appropriate channels to
+                * adjust the DDR clock duty cycle for chips that support
+                * it
+                */
+               if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+                   octeon_is_cpuid(OCTEON_CN73XX) ||
+                   octeon_is_cpuid(OCTEON_CNF75XX)) {
+                       union cvmx_lmcx_phy_ctl lmc_phy_ctl;
+                       int i;
+
+                       for (i = 0; i < 4; ++i) {
+                               if ((if_mask & (1 << i)) == 0)
+                                       continue;
+
+                               lmc_phy_ctl.u64 =
+                                   lmc_rd(priv, CVMX_LMCX_PHY_CTL(i));
+
+                               if (octeon_is_cpuid(OCTEON_CNF75XX) ||
+                                   octeon_is_cpuid(OCTEON_CN73XX_PASS1_3)) {
+                                       /* Both LMCs */
+                                       lmc_phy_ctl.s.lv_mode = 0;
+                               } else {
+                                       /* Odd LMCs = 0, Even LMCs = 1 */
+                                       lmc_phy_ctl.s.lv_mode = (~i) & 1;
+                               }
+
+                               debug("LMC%d: PHY_CTL                                 : 0x%016llx\n",
+                                     i, lmc_phy_ctl.u64);
+                               lmc_wr(priv, CVMX_LMCX_PHY_CTL(i),
+                                      lmc_phy_ctl.u64);
+                       }
+               }
+       }
+
+       /*
+        * 5.9.6 LMC RESET Initialization
+        *
+        * NOTE: this is now done as the first step in
+        * init_octeon3_ddr3_interface, rather than the last step in clock
+        * init. This reorg allows restarting per-LMC initialization should
+        * problems be encountered, rather than being forced to resort to
+        * resetting the chip and starting all over.
+        *
+        * Look for the code in octeon3_lmc.c: perform_lmc_reset().
+        */
+
+       /* Fallthrough for all interfaces... */
+not_if0:
+
+       /*
+        * Start the DDR clock so that its frequency can be measured.
+        * For some chips we must activate the memory controller with
+        * init_start to make the DDR clock start to run.
+        */
+       if ((!octeon_is_cpuid(OCTEON_CN6XXX)) &&
+           (!octeon_is_cpuid(OCTEON_CNF7XXX)) &&
+           (!octeon_is_cpuid(OCTEON_CN7XXX))) {
+               union cvmx_lmcx_mem_cfg0 mem_cfg0;
+
+               mem_cfg0.u64 = 0;
+               mem_cfg0.s.init_start = 1;
+               lmc_wr(priv, CVMX_LMCX_MEM_CFG0(if_num), mem_cfg0.u64);
+               lmc_rd(priv, CVMX_LMCX_MEM_CFG0(if_num));
+       }
+
+       set_ddr_clock_initialized(priv, if_num, 1);
+
+       return 0;
+}
+
+static void octeon_ipd_delay_cycles(u64 cycles)
+{
+       u64 start = csr_rd(CVMX_IPD_CLK_COUNT);
+
+       while (start + cycles > csr_rd(CVMX_IPD_CLK_COUNT))
+               ;
+}
+
+static void octeon_ipd_delay_cycles_o3(u64 cycles)
+{
+       u64 start = csr_rd(CVMX_FPA_CLK_COUNT);
+
+       while (start + cycles > csr_rd(CVMX_FPA_CLK_COUNT))
+               ;
+}
+
+static u32 measure_octeon_ddr_clock(struct ddr_priv *priv,
+                                   struct ddr_conf *ddr_conf, u32 cpu_hertz,
+                                   u32 ddr_hertz, u32 ddr_ref_hertz,
+                                   int if_num, u32 if_mask)
+{
+       u64 core_clocks;
+       u64 ddr_clocks;
+       u64 calc_ddr_hertz;
+
+       if (ddr_conf) {
+               if (initialize_ddr_clock(priv, ddr_conf, cpu_hertz,
+                                        ddr_hertz, ddr_ref_hertz, if_num,
+                                        if_mask) != 0)
+                       return 0;
+       }
+
+       /* Dynamically determine the DDR clock speed */
+       if (OCTEON_IS_OCTEON2() || octeon_is_cpuid(OCTEON_CN70XX)) {
+               core_clocks = csr_rd(CVMX_IPD_CLK_COUNT);
+               ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+               /* How many cpu cycles to measure over */
+               octeon_ipd_delay_cycles(100000000);
+               core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks;
+               ddr_clocks =
+                   lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks;
+               calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks;
+       } else if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+               core_clocks = csr_rd(CVMX_FPA_CLK_COUNT);
+               ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+               /* How many cpu cycles to measure over */
+               octeon_ipd_delay_cycles_o3(100000000);
+               core_clocks = csr_rd(CVMX_FPA_CLK_COUNT) - core_clocks;
+               ddr_clocks =
+                   lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks;
+               calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks;
+       } else {
+               core_clocks = csr_rd(CVMX_IPD_CLK_COUNT);
+               /*
+                * ignore overflow, starts counting when we enable the
+                * controller
+                */
+               ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num));
+               /* How many cpu cycles to measure over */
+               octeon_ipd_delay_cycles(100000000);
+               core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks;
+               ddr_clocks =
+                   lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num)) - ddr_clocks;
+               calc_ddr_hertz = ddr_clocks * cpu_hertz / core_clocks;
+       }
+
+       debug("core clocks: %llu, ddr clocks: %llu, calc rate: %llu\n",
+             core_clocks, ddr_clocks, calc_ddr_hertz);
+       debug("LMC%d: Measured DDR clock: %lld, cpu clock: %u, ddr clocks: %llu\n",
+             if_num, calc_ddr_hertz, cpu_hertz, ddr_clocks);
+
+       /* Check for unreasonable settings. */
+       if (calc_ddr_hertz < 10000) {
+               udelay(8000000 * 100);
+               printf("DDR clock misconfigured on interface %d. Resetting...\n",
+                      if_num);
+               do_reset(NULL, 0, 0, NULL);
+       }
+
+       return calc_ddr_hertz;
+}
+
+u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx)
+{
+       union cvmx_lmcx_rlevel_dbg rlevel_dbg;
+       union cvmx_lmcx_rlevel_ctl rlevel_ctl;
+
+       rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+       rlevel_ctl.s.byte = idx;
+
+       lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+       lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+
+       rlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_DBG(if_num));
+       return rlevel_dbg.s.bitmask;
+}
+
+u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx)
+{
+       union cvmx_lmcx_wlevel_dbg wlevel_dbg;
+
+       wlevel_dbg.u64 = 0;
+       wlevel_dbg.s.byte = idx;
+
+       lmc_wr(priv, CVMX_LMCX_WLEVEL_DBG(if_num), wlevel_dbg.u64);
+       lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num));
+
+       wlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num));
+       return wlevel_dbg.s.bitmask;
+}
+
+int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p,
+                                int ddr_type)
+{
+       int i;
+       int errors = 0;
+       u64 mask = 0;           /* Used in 64-bit comparisons */
+       u8 mstart = 0;
+       u8 width = 0;
+       u8 firstbit = 0;
+       u8 lastbit = 0;
+       u8 bubble = 0;
+       u8 tbubble = 0;
+       u8 blank = 0;
+       u8 narrow = 0;
+       u8 trailing = 0;
+       u64 bitmask = rlevel_bitmask_p->bm;
+       u8 extras = 0;
+       u8 toolong = 0;
+       u64 temp;
+
+       if (bitmask == 0) {
+               blank += RLEVEL_BITMASK_BLANK_ERROR;
+       } else {
+               /* Look for fb, the first bit */
+               temp = bitmask;
+               while (!(temp & 1)) {
+                       firstbit++;
+                       temp >>= 1;
+               }
+
+               /* Look for lb, the last bit */
+               lastbit = firstbit;
+               while ((temp >>= 1))
+                       lastbit++;
+
+               /*
+                * Start with the max range to try to find the largest mask
+                * within the bitmask data
+                */
+               width = MASKRANGE_BITS;
+               for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
+                       for (mstart = lastbit - width + 1; mstart >= firstbit;
+                            --mstart) {
+                               temp = mask << mstart;
+                               if ((bitmask & temp) == temp)
+                                       goto done_now;
+                       }
+               }
+done_now:
+               /* look for any more contiguous 1's to the right of mstart */
+               if (width == MASKRANGE_BITS) {  // only when maximum mask
+                       while ((bitmask >> (mstart - 1)) & 1) {
+                               // slide right over more 1's
+                               --mstart;
+                               // count the number of extra bits only for DDR4
+                               if (ddr_type == DDR4_DRAM)
+                                       extras++;
+                       }
+               }
+
+               /* Penalize any extra 1's beyond the maximum desired mask */
+               if (extras > 0)
+                       toolong =
+                           RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
+
+               /* Detect if bitmask is too narrow. */
+               if (width < 4)
+                       narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
+
+               /*
+                * detect leading bubble bits, that is, any 0's between first
+                * and mstart
+                */
+               temp = bitmask >> (firstbit + 1);
+               i = mstart - firstbit - 1;
+               while (--i >= 0) {
+                       if ((temp & 1) == 0)
+                               bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+                       temp >>= 1;
+               }
+
+               temp = bitmask >> (mstart + width + extras);
+               i = lastbit - (mstart + width + extras - 1);
+               while (--i >= 0) {
+                       if (temp & 1) {
+                               /*
+                                * Detect 1 bits after the trailing end of
+                                * the mask, including last.
+                                */
+                               trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
+                       } else {
+                               /*
+                                * Detect trailing bubble bits, that is,
+                                * any 0's between end-of-mask and last
+                                */
+                               tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+                       }
+                       temp >>= 1;
+               }
+       }
+
+       errors = bubble + tbubble + blank + narrow + trailing + toolong;
+
+       /* Pass out useful statistics */
+       rlevel_bitmask_p->mstart = mstart;
+       rlevel_bitmask_p->width = width;
+
+       debug_bitmask_print("bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
+                           (unsigned long)bitmask, mask, width, mstart,
+                           firstbit, lastbit, bubble, tbubble, blank,
+                           narrow, trailing, toolong, errors,
+                           (errors) ? "=> invalid" : "");
+
+       return errors;
+}
+
+int compute_ddr3_rlevel_delay(u8 mstart, u8 width,
+                             union cvmx_lmcx_rlevel_ctl rlevel_ctl)
+{
+       int delay;
+
+       debug_bitmask_print("  offset_en:%d", rlevel_ctl.s.offset_en);
+
+       if (rlevel_ctl.s.offset_en) {
+               delay = max((int)mstart,
+                           (int)(mstart + width - 1 - rlevel_ctl.s.offset));
+       } else {
+               /* if (rlevel_ctl.s.offset) { *//* Experimental */
+               if (0) {
+                       delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
+                       /*
+                        * Insure that the offset delay falls within the
+                        * bitmask
+                        */
+                       delay = min(delay, mstart + width - 1);
+               } else {
+                       /* Round down */
+                       delay = (width - 1) / 2 + mstart;
+               }
+       }
+
+       return delay;
+}
+
+/* Default ODT config must disable ODT */
+/* Must be const (read only) so that the structure is in flash */
+const struct dimm_odt_config disable_odt_config[] = {
+       /*   1 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+       /*   2 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+       /*   3 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+       /*   4 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+};
+
+/* Memory controller setup function */
+static int init_octeon_dram_interface(struct ddr_priv *priv,
+                                     struct ddr_conf *ddr_conf,
+                                     u32 ddr_hertz, u32 cpu_hertz,
+                                     u32 ddr_ref_hertz, int if_num,
+                                     u32 if_mask)
+{
+       u32 mem_size_mbytes = 0;
+       char *s;
+
+       s = lookup_env(priv, "ddr_timing_hertz");
+       if (s)
+               ddr_hertz = simple_strtoul(s, NULL, 0);
+
+       if (OCTEON_IS_OCTEON3()) {
+               int lmc_restart_retries = 0;
+#define DEFAULT_RESTART_RETRIES 3
+               int lmc_restart_retries_limit = DEFAULT_RESTART_RETRIES;
+
+               s = lookup_env(priv, "ddr_restart_retries_limit");
+               if (s)
+                       lmc_restart_retries_limit = simple_strtoul(s, NULL, 0);
+
+restart_lmc_init:
+               mem_size_mbytes = init_octeon3_ddr3_interface(priv, ddr_conf,
+                                                             ddr_hertz,
+                                                             cpu_hertz,
+                                                             ddr_ref_hertz,
+                                                             if_num, if_mask);
+               if (mem_size_mbytes == 0) {     // 0 means restart is possible
+                       if (lmc_restart_retries < lmc_restart_retries_limit) {
+                               lmc_restart_retries++;
+                               printf("N0.LMC%d Configuration problem: attempting LMC reset and init restart %d\n",
+                                      if_num, lmc_restart_retries);
+                               goto restart_lmc_init;
+                       } else {
+                               if (lmc_restart_retries_limit > 0) {
+                                       printf("INFO: N0.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n",
+                                              if_num, lmc_restart_retries);
+                                       mdelay(500);
+                                       do_reset(NULL, 0, 0, NULL);
+                               } else {
+                                       // return an error, no restart
+                                       mem_size_mbytes = -1;
+                               }
+                       }
+               }
+       }
+
+       debug("N0.LMC%d Configuration Completed: %d MB\n",
+             if_num, mem_size_mbytes);
+
+       return mem_size_mbytes;
+}
+
+#define WLEVEL_BYTE_BITS       5
+#define WLEVEL_BYTE_MSK                ((1ULL << 5) - 1)
+
+void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank,
+                int byte, int delay)
+{
+       union cvmx_lmcx_wlevel_rankx temp_wlevel_rank;
+
+       if (byte >= 0 && byte <= 8) {
+               temp_wlevel_rank.u64 = lmc_wlevel_rank->u64;
+               temp_wlevel_rank.u64 &=
+                   ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
+               temp_wlevel_rank.u64 |=
+                   ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
+               lmc_wlevel_rank->u64 = temp_wlevel_rank.u64;
+       }
+}
+
+int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte)
+{
+       int delay = 0;
+
+       if (byte >= 0 && byte <= 8)
+               delay =
+                   ((lmc_wlevel_rank->u64) >> (WLEVEL_BYTE_BITS *
+                                               byte)) & WLEVEL_BYTE_MSK;
+
+       return delay;
+}
+
+void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+                int byte, int delay)
+{
+       union cvmx_lmcx_rlevel_rankx temp_rlevel_rank;
+
+       if (byte >= 0 && byte <= 8) {
+               temp_rlevel_rank.u64 =
+                   lmc_rlevel_rank->u64 & ~(RLEVEL_BYTE_MSK <<
+                                            (RLEVEL_BYTE_BITS * byte));
+               temp_rlevel_rank.u64 |=
+                   ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
+               lmc_rlevel_rank->u64 = temp_rlevel_rank.u64;
+       }
+}
+
+int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte)
+{
+       int delay = 0;
+
+       if (byte >= 0 && byte <= 8)
+               delay =
+                   ((lmc_rlevel_rank->u64) >> (RLEVEL_BYTE_BITS *
+                                               byte)) & RLEVEL_BYTE_MSK;
+
+       return delay;
+}
+
+void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+                     union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte)
+{
+       int byte_delay = get_rl_rank(lmc_rlevel_rank, byte);
+
+       debug("Estimating Wlevel delay byte %d: ", byte);
+       debug("Rlevel=%d => ", byte_delay);
+       byte_delay = divide_roundup(byte_delay, 2) & 0x1e;
+       debug("Wlevel=%d\n", byte_delay);
+       upd_wl_rank(lmc_wlevel_rank, byte, byte_delay);
+}
+
+/* Delay trend: constant=0, decreasing=-1, increasing=1 */
+static s64 calc_delay_trend(s64 v)
+{
+       if (v == 0)
+               return 0;
+       if (v < 0)
+               return -1;
+
+       return 1;
+}
+
+/*
+ * Evaluate delay sequence across the whole range of byte delays while
+ * keeping track of the overall delay trend, increasing or decreasing.
+ * If the trend changes charge an error amount to the score.
+ */
+
+// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
+
+int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end,
+              int max_adj_delay_inc)
+{
+       s64 error = 0;
+       s64 delay_trend, prev_trend = 0;
+       int byte_idx;
+       s64 seq_err;
+       s64 adj_err;
+       s64 delay_inc;
+       s64 delay_diff;
+
+       for (byte_idx = start; byte_idx < end; ++byte_idx) {
+               delay_diff = rlevel_byte[byte_idx + 1].delay -
+                       rlevel_byte[byte_idx].delay;
+               delay_trend = calc_delay_trend(delay_diff);
+
+               /*
+                * Increment error each time the trend changes to the
+                * opposite direction.
+                */
+               if (prev_trend != 0 && delay_trend != 0 &&
+                   prev_trend != delay_trend) {
+                       seq_err = RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
+               } else {
+                       seq_err = 0;
+               }
+
+               // how big was the delay change, if any
+               delay_inc = abs(delay_diff);
+
+               /*
+                * Even if the trend did not change to the opposite direction,
+                * check for the magnitude of the change, and scale the
+                * penalty by the amount that the size is larger than the
+                * provided limit.
+                */
+               if (max_adj_delay_inc != 0 && delay_inc > max_adj_delay_inc) {
+                       adj_err = (delay_inc - max_adj_delay_inc) *
+                               RLEVEL_ADJACENT_DELAY_ERROR;
+               } else {
+                       adj_err = 0;
+               }
+
+               rlevel_byte[byte_idx + 1].sqerrs = seq_err + adj_err;
+               error += seq_err + adj_err;
+
+               debug_bitmask_print("Byte %d: %d, Byte %d: %d, delay_trend: %ld, prev_trend: %ld, [%ld/%ld]%s%s\n",
+                                   byte_idx + 0,
+                                   rlevel_byte[byte_idx + 0].delay,
+                                   byte_idx + 1,
+                                   rlevel_byte[byte_idx + 1].delay,
+                                   delay_trend,
+                                   prev_trend, seq_err, adj_err,
+                                   (seq_err) ?
+                                   " => Nonsequential byte delay" : "",
+                                   (adj_err) ?
+                                   " => Adjacent delay error" : "");
+
+               if (delay_trend != 0)
+                       prev_trend = delay_trend;
+       }
+
+       return (int)error;
+}
+
+int roundup_ddr3_wlevel_bitmask(int bitmask)
+{
+       int shifted_bitmask;
+       int leader;
+       int delay;
+
+       for (leader = 0; leader < 8; ++leader) {
+               shifted_bitmask = (bitmask >> leader);
+               if ((shifted_bitmask & 1) == 0)
+                       break;
+       }
+
+       for (leader = leader; leader < 16; ++leader) {
+               shifted_bitmask = (bitmask >> (leader % 8));
+               if (shifted_bitmask & 1)
+                       break;
+       }
+
+       delay = (leader & 1) ? leader + 1 : leader;
+       delay = delay % 8;
+
+       return delay;
+}
+
+/* Octeon 2 */
+static void oct2_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+                         int sequence)
+{
+       char *s;
+
+#ifdef DEBUG_PERFORM_DDR3_SEQUENCE
+       static const char * const sequence_str[] = {
+               "power-up/init",
+               "read-leveling",
+               "self-refresh entry",
+               "self-refresh exit",
+               "precharge power-down entry",
+               "precharge power-down exit",
+               "write-leveling",
+               "illegal"
+       };
+#endif
+
+       union cvmx_lmcx_control lmc_control;
+       union cvmx_lmcx_config lmc_config;
+       int save_ddr2t;
+
+       lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+       save_ddr2t = lmc_control.s.ddr2t;
+
+       if (save_ddr2t == 0 && octeon_is_cpuid(OCTEON_CN63XX_PASS1_X)) {
+               /* Some register parts (IDT and TI included) do not like
+                * the sequence that LMC generates for an MRS register
+                * write in 1T mode. In this case, the register part does
+                * not properly forward the MRS register write to the DRAM
+                * parts.  See errata (LMC-14548) Issues with registered
+                * DIMMs.
+                */
+               debug("Forcing DDR 2T during init seq. Re: Pass 1 LMC-14548\n");
+               lmc_control.s.ddr2t = 1;
+       }
+
+       s = lookup_env(priv, "ddr_init_2t");
+       if (s)
+               lmc_control.s.ddr2t = simple_strtoul(s, NULL, 0);
+
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+
+       lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+       lmc_config.s.init_start = 1;
+       if (OCTEON_IS_OCTEON2())
+               lmc_config.cn63xx.sequence = sequence;
+       lmc_config.s.rankmask = rank_mask;
+
+#ifdef DEBUG_PERFORM_DDR3_SEQUENCE
+       debug("Performing LMC sequence: rank_mask=0x%02x, sequence=%d, %s\n",
+             rank_mask, sequence, sequence_str[sequence]);
+#endif
+
+       lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+       lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+       udelay(600);            /* Wait a while */
+
+       lmc_control.s.ddr2t = save_ddr2t;
+       lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+       lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+}
+
+/* Check to see if any custom offset values are used */
+static int is_dll_offset_provided(const int8_t *dll_offset_table)
+{
+       int i;
+
+       if (!dll_offset_table)  /* Check for pointer to table. */
+               return 0;
+
+       for (i = 0; i < 9; ++i) {
+               if (dll_offset_table[i] != 0)
+                       return 1;
+       }
+
+       return 0;
+}
+
+void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change)
+{
+       union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+       SET_DDR_DLL_CTL3(offset_ena, !!change);
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+}
+
+unsigned short load_dll_offset(struct ddr_priv *priv, int if_num,
+                              int dll_offset_mode, int byte_offset, int byte)
+{
+       union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+       int field_width = 6;
+       /*
+        * byte_sel:
+        * 0x1 = byte 0, ..., 0x9 = byte 8
+        * 0xA = all bytes
+        */
+       int byte_sel = (byte == 10) ? byte : byte + 1;
+
+       if (octeon_is_cpuid(OCTEON_CN6XXX))
+               field_width = 5;
+
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+       SET_DDR_DLL_CTL3(load_offset, 0);
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
+       SET_DDR_DLL_CTL3(offset,
+                        (abs(byte_offset) & (~(-1 << field_width))) |
+                        (_sign(byte_offset) << field_width));
+       SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       SET_DDR_DLL_CTL3(load_offset, 1);
+       lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+       ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+       return (unsigned short)GET_DDR_DLL_CTL3(offset);
+}
+
+void process_custom_dll_offsets(struct ddr_priv *priv, int if_num,
+                               const char *enable_str,
+                               const int8_t *offsets, const char *byte_str,
+                               int mode)
+{
+       const char *s;
+       int enabled;
+       int provided;
+       int byte_offset;
+       unsigned short offset[9] = { 0 };
+       int byte;
+
+       s = lookup_env(priv, enable_str);
+       if (s)
+               enabled = !!simple_strtol(s, NULL, 0);
+       else
+               enabled = -1;
+
+       /*
+        * enabled == -1: no override, do only configured offsets if provided
+        * enabled ==  0: override OFF, do NOT do it even if configured
+        *                offsets provided
+        * enabled ==  1: override ON, do it for overrides plus configured
+        *                offsets
+        */
+
+       if (enabled == 0)
+               return;
+
+       provided = is_dll_offset_provided(offsets);
+
+       if (enabled < 0 && !provided)
+               return;
+
+       change_dll_offset_enable(priv, if_num, 0);
+
+       for (byte = 0; byte < 9; ++byte) {
+               // always take the provided, if available
+               byte_offset = (provided) ? offsets[byte] : 0;
+
+               // then, if enabled, use any overrides present
+               if (enabled > 0) {
+                       s = lookup_env(priv, byte_str, if_num, byte);
+                       if (s)
+                               byte_offset = simple_strtol(s, NULL, 0);
+               }
+
+               offset[byte] =
+                   load_dll_offset(priv, if_num, mode, byte_offset, byte);
+       }
+
+       change_dll_offset_enable(priv, if_num, 1);
+
+       debug("N0.LMC%d: DLL %s Offset 8:0       :  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x\n",
+             if_num, (mode == 2) ? "Read " : "Write",
+             offset[8], offset[7], offset[6], offset[5], offset[4],
+             offset[3], offset[2], offset[1], offset[0]);
+}
+
+void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num)
+{
+       char *s;
+       int ddr_init_loops = 1;
+       int rankx;
+
+       s = lookup_env(priv, "ddr%d_init_loops", if_num);
+       if (s)
+               ddr_init_loops = simple_strtoul(s, NULL, 0);
+
+       while (ddr_init_loops--) {
+               for (rankx = 0; rankx < 8; rankx++) {
+                       if (!(rank_mask & (1 << rankx)))
+                               continue;
+
+                       if (OCTEON_IS_OCTEON3()) {
+                               /* power-up/init */
+                               oct3_ddr3_seq(priv, 1 << rankx, if_num, 0);
+                       } else {
+                               /* power-up/init */
+                               oct2_ddr3_seq(priv, 1 << rankx, if_num, 0);
+                       }
+
+                       udelay(1000);   /* Wait a while. */
+
+                       s = lookup_env(priv, "ddr_sequence1");
+                       if (s) {
+                               int sequence1;
+
+                               sequence1 = simple_strtoul(s, NULL, 0);
+
+                               if (OCTEON_IS_OCTEON3()) {
+                                       oct3_ddr3_seq(priv, 1 << rankx,
+                                                     if_num, sequence1);
+                               } else {
+                                       oct2_ddr3_seq(priv, 1 << rankx,
+                                                     if_num, sequence1);
+                               }
+                       }
+
+                       s = lookup_env(priv, "ddr_sequence2");
+                       if (s) {
+                               int sequence2;
+
+                               sequence2 = simple_strtoul(s, NULL, 0);
+
+                               if (OCTEON_IS_OCTEON3())
+                                       oct3_ddr3_seq(priv, 1 << rankx,
+                                                     if_num, sequence2);
+                               else
+                                       oct2_ddr3_seq(priv, 1 << rankx,
+                                                     if_num, sequence2);
+                       }
+               }
+       }
+}
+
+static int octeon_ddr_initialize(struct ddr_priv *priv, u32 cpu_hertz,
+                                u32 ddr_hertz, u32 ddr_ref_hertz,
+                                u32 if_mask,
+                                struct ddr_conf *ddr_conf,
+                                u32 *measured_ddr_hertz)
+{
+       u32 ddr_conf_valid_mask = 0;
+       int memsize_mbytes = 0;
+       char *eptr;
+       int if_idx;
+       u32 ddr_max_speed = 667000000;
+       u32 calc_ddr_hertz = -1;
+       int val;
+       int ret;
+
+       if (env_get("ddr_verbose") || env_get("ddr_prompt"))
+               priv->flags |= FLAG_DDR_VERBOSE;
+
+#ifdef DDR_VERBOSE
+       priv->flags |= FLAG_DDR_VERBOSE;
+#endif
+
+       if (env_get("ddr_trace_init")) {
+               printf("Parameter ddr_trace_init found in environment.\n");
+               priv->flags |= FLAG_DDR_TRACE_INIT;
+               priv->flags |= FLAG_DDR_VERBOSE;
+       }
+
+       priv->flags |= FLAG_DDR_DEBUG;
+
+       val = env_get_ulong("ddr_debug", 10, (u32)-1);
+       switch (val) {
+       case 0:
+               priv->flags &= ~FLAG_DDR_DEBUG;
+               printf("Parameter ddr_debug clear in environment\n");
+               break;
+       case (u32)-1:
+               break;
+       default:
+               printf("Parameter ddr_debug set in environment\n");
+               priv->flags |= FLAG_DDR_DEBUG;
+               priv->flags |= FLAG_DDR_VERBOSE;
+               break;
+       }
+       if (env_get("ddr_prompt"))
+               priv->flags |= FLAG_DDR_PROMPT;
+
+       /* Force ddr_verbose for failsafe debugger */
+       if (priv->flags & FLAG_FAILSAFE_MODE)
+               priv->flags |= FLAG_DDR_VERBOSE;
+
+#ifdef DDR_DEBUG
+       priv->flags |= FLAG_DDR_DEBUG;
+       /* Keep verbose on while we are still debugging. */
+       priv->flags |= FLAG_DDR_VERBOSE;
+#endif
+
+       if ((octeon_is_cpuid(OCTEON_CN61XX) ||
+            octeon_is_cpuid(OCTEON_CNF71XX)) && ddr_max_speed > 533333333) {
+               ddr_max_speed = 533333333;
+       } else if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+               /* Override speed restrictions to support internal testing. */
+               ddr_max_speed = 1210000000;
+       }
+
+       if (ddr_hertz > ddr_max_speed) {
+               printf("DDR clock speed %u exceeds maximum supported DDR speed, reducing to %uHz\n",
+                      ddr_hertz, ddr_max_speed);
+               ddr_hertz = ddr_max_speed;
+       }
+
+       if (OCTEON_IS_OCTEON3()) {      // restrict check
+               if (ddr_hertz > cpu_hertz) {
+                       printf("\nFATAL ERROR: DDR speed %u exceeds CPU speed %u, exiting...\n\n",
+                              ddr_hertz, cpu_hertz);
+                       return -1;
+               }
+       }
+
+       /* Enable L2 ECC */
+       eptr = env_get("disable_l2_ecc");
+       if (eptr) {
+               printf("Disabling L2 ECC based on disable_l2_ecc environment variable\n");
+               union cvmx_l2c_ctl l2c_val;
+
+               l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+               l2c_val.s.disecc = 1;
+               l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+       } else {
+               union cvmx_l2c_ctl l2c_val;
+
+               l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+               l2c_val.s.disecc = 0;
+               l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+       }
+
+       /*
+        * Init the L2C, must be done before DRAM access so that we
+        * know L2 is empty
+        */
+       eptr = env_get("disable_l2_index_aliasing");
+       if (eptr) {
+               union cvmx_l2c_ctl l2c_val;
+
+               puts("L2 index aliasing disabled.\n");
+
+               l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+               l2c_val.s.disidxalias = 1;
+               l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+       } else {
+               union cvmx_l2c_ctl l2c_val;
+
+               /* Enable L2C index aliasing */
+
+               l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+               l2c_val.s.disidxalias = 0;
+               l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+       }
+
+       if (OCTEON_IS_OCTEON3()) {
+               /*
+                * rdf_cnt: Defines the sample point of the LMC response data in
+                * the DDR-clock/core-clock crossing.  For optimal
+                * performance set to 10 * (DDR-clock period/core-clock
+                * period) - 1.  To disable set to 0. All other values
+                * are reserved.
+                */
+
+               union cvmx_l2c_ctl l2c_ctl;
+               u64 rdf_cnt;
+               char *s;
+
+               l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+
+               /*
+                * It is more convenient to compute the ratio using clock
+                * frequencies rather than clock periods.
+                */
+               rdf_cnt = (((u64)10 * cpu_hertz) / ddr_hertz) - 1;
+               rdf_cnt = rdf_cnt < 256 ? rdf_cnt : 255;
+               l2c_ctl.cn78xx.rdf_cnt = rdf_cnt;
+
+               s = lookup_env(priv, "early_fill_count");
+               if (s)
+                       l2c_ctl.cn78xx.rdf_cnt = simple_strtoul(s, NULL, 0);
+
+               debug("%-45s : %d, cpu_hertz:%d, ddr_hertz:%d\n",
+                     "EARLY FILL COUNT  ", l2c_ctl.cn78xx.rdf_cnt, cpu_hertz,
+                     ddr_hertz);
+               l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+       }
+
+       /* Check for lower DIMM socket populated */
+       for (if_idx = 0; if_idx < 4; ++if_idx) {
+               if ((if_mask & (1 << if_idx)) &&
+                   validate_dimm(priv,
+                                 &ddr_conf[(int)if_idx].dimm_config_table[0],
+                                 0))
+                       ddr_conf_valid_mask |= (1 << if_idx);
+       }
+
+       if (octeon_is_cpuid(OCTEON_CN68XX) || octeon_is_cpuid(OCTEON_CN78XX)) {
+               int four_lmc_mode = 1;
+               char *s;
+
+               if (priv->flags & FLAG_FAILSAFE_MODE)
+                       four_lmc_mode = 0;
+
+               /* Pass 1.0 disable four LMC mode.
+                *  See errata (LMC-15811)
+                */
+               if (octeon_is_cpuid(OCTEON_CN68XX_PASS1_0))
+                       four_lmc_mode = 0;
+
+               s = env_get("ddr_four_lmc");
+               if (s) {
+                       four_lmc_mode = simple_strtoul(s, NULL, 0);
+                       printf("Parameter found in environment. ddr_four_lmc = %d\n",
+                              four_lmc_mode);
+               }
+
+               if (!four_lmc_mode) {
+                       puts("Forcing two-LMC Mode.\n");
+                       /* Invalidate LMC[2:3] */
+                       ddr_conf_valid_mask &= ~(3 << 2);
+               }
+       } else if (octeon_is_cpuid(OCTEON_CN73XX)) {
+               int one_lmc_mode = 0;
+               char *s;
+
+               s = env_get("ddr_one_lmc");
+               if (s) {
+                       one_lmc_mode = simple_strtoul(s, NULL, 0);
+                       printf("Parameter found in environment. ddr_one_lmc = %d\n",
+                              one_lmc_mode);
+               }
+
+               if (one_lmc_mode) {
+                       puts("Forcing one-LMC Mode.\n");
+                       /* Invalidate LMC[1:3] */
+                       ddr_conf_valid_mask &= ~(1 << 1);
+               }
+       }
+
+       if (!ddr_conf_valid_mask) {
+               printf
+                   ("ERROR: No valid DIMMs detected on any DDR interface.\n");
+               hang();
+               return -1;      // testr-only: no ret negativ!!!
+       }
+
+       /*
+        * We measure the DDR frequency by counting DDR clocks.  We can
+        * confirm or adjust the expected frequency as necessary.  We use
+        * the measured frequency to make accurate timing calculations
+        * used to configure the controller.
+        */
+       for (if_idx = 0; if_idx < 4; ++if_idx) {
+               u32 tmp_hertz;
+
+               if (!(ddr_conf_valid_mask & (1 << if_idx)))
+                       continue;
+
+try_again:
+               /*
+                * only check for alternate refclk wanted on chips that
+                * support it
+                */
+               if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+                   (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+                   (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+                       // only need do this if we are LMC0
+                       if (if_idx == 0) {
+                               union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+
+                               ddr_pll_ctl.u64 =
+                                   lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+                               /*
+                                * If we are asking for 100 MHz refclk, we can
+                                * only get it via alternate, so switch to it
+                                */
+                               if (ddr_ref_hertz == 100000000) {
+                                       ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel =
+                                           1;
+                                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0),
+                                              ddr_pll_ctl.u64);
+                                       udelay(1000);   // wait 1 msec
+                               } else {
+                                       /*
+                                        * If we are NOT asking for 100MHz,
+                                        * then reset to (assumed) 50MHz and go
+                                        * on
+                                        */
+                                       ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel =
+                                           0;
+                                       lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0),
+                                              ddr_pll_ctl.u64);
+                                       udelay(1000);   // wait 1 msec
+                               }
+                       }
+               } else {
+                       if (ddr_ref_hertz == 100000000) {
+                               debug("N0: DRAM init: requested 100 MHz refclk NOT SUPPORTED\n");
+                               ddr_ref_hertz = CONFIG_REF_HERTZ;
+                       }
+               }
+
+               tmp_hertz = measure_octeon_ddr_clock(priv, &ddr_conf[if_idx],
+                                                    cpu_hertz, ddr_hertz,
+                                                    ddr_ref_hertz, if_idx,
+                                                    ddr_conf_valid_mask);
+
+               /*
+                * only check for alternate refclk acquired on chips that
+                * support it
+                */
+               if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+                   (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+                   (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+                       /*
+                        * if we are LMC0 and we are asked for 100 MHz refclk,
+                        * we must be sure it is available
+                        * If not, we print an error message, set to 50MHz,
+                        * and go on...
+                        */
+                       if (if_idx == 0 && ddr_ref_hertz == 100000000) {
+                               /*
+                                * Validate that the clock returned is close
+                                * enough to the clock desired
+                                */
+                               // FIXME: is 5% close enough?
+                               int hertz_diff =
+                                   abs((int)tmp_hertz - (int)ddr_hertz);
+                               if (hertz_diff > ((int)ddr_hertz * 5 / 100)) {
+                                       // nope, diff is greater than than 5%
+                                       debug("N0: DRAM init: requested 100 MHz refclk NOT FOUND\n");
+                                       ddr_ref_hertz = CONFIG_REF_HERTZ;
+                                       // clear the flag before trying again!!
+                                       set_ddr_clock_initialized(priv, 0, 0);
+                                       goto try_again;
+                               } else {
+                                       debug("N0: DRAM Init: requested 100 MHz refclk FOUND and SELECTED\n");
+                               }
+                       }
+               }
+
+               if (tmp_hertz > 0)
+                       calc_ddr_hertz = tmp_hertz;
+               debug("LMC%d: measured speed: %u hz\n", if_idx, tmp_hertz);
+       }
+
+       if (measured_ddr_hertz)
+               *measured_ddr_hertz = calc_ddr_hertz;
+
+       memsize_mbytes = 0;
+       for (if_idx = 0; if_idx < 4; ++if_idx) {
+               if (!(ddr_conf_valid_mask & (1 << if_idx)))
+                       continue;
+
+               ret = init_octeon_dram_interface(priv, &ddr_conf[if_idx],
+                                                calc_ddr_hertz,
+                                                cpu_hertz, ddr_ref_hertz,
+                                                if_idx, ddr_conf_valid_mask);
+               if (ret > 0)
+                       memsize_mbytes += ret;
+       }
+
+       if (memsize_mbytes == 0)
+               /* All interfaces failed to initialize, so return error */
+               return -1;
+
+       /*
+        * switch over to DBI mode only for chips that support it, and
+        * enabled by envvar
+        */
+       if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+           (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+           (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+               eptr = env_get("ddr_dbi_switchover");
+               if (eptr) {
+                       printf("DBI Switchover starting...\n");
+                       cvmx_dbi_switchover(priv);
+                       printf("DBI Switchover finished.\n");
+               }
+       }
+
+       /* call HW-assist tuning here on chips that support it */
+       if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+           (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+           (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)))
+               cvmx_maybe_tune_node(priv, calc_ddr_hertz);
+
+       eptr = env_get("limit_dram_mbytes");
+       if (eptr) {
+               unsigned int mbytes = simple_strtoul(eptr, NULL, 10);
+
+               if (mbytes > 0) {
+                       memsize_mbytes = mbytes;
+                       printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n",
+                              mbytes);
+               }
+       }
+
+       debug("LMC Initialization complete. Total DRAM %d MB\n",
+             memsize_mbytes);
+
+       return memsize_mbytes;
+}
+
+static int octeon_ddr_probe(struct udevice *dev)
+{
+       struct ddr_priv *priv = dev_get_priv(dev);
+       struct ofnode_phandle_args l2c_node;
+       struct ddr_conf *ddr_conf_ptr;
+       u32 ddr_conf_valid_mask = 0;
+       u32 measured_ddr_hertz = 0;
+       int conf_table_count;
+       int def_ddr_freq;
+       u32 mem_mbytes = 0;
+       u32 ddr_hertz;
+       u32 ddr_ref_hertz;
+       int alt_refclk;
+       const char *eptr;
+       fdt_addr_t addr;
+       u64 *ptr;
+       u64 val;
+       int ret;
+       int i;
+
+       /* Don't try to re-init the DDR controller after relocation */
+       if (gd->flags & GD_FLG_RELOC)
+               return 0;
+
+       /*
+        * Dummy read all local variables into cache, so that they are
+        * locked in cache when the DDR code runs with flushes etc enabled
+        */
+       ptr = (u64 *)_end;
+       for (i = 0; i < (0x100000 / sizeof(u64)); i++)
+               val = readq(ptr++);
+
+       /*
+        * The base addresses of LMC and L2C are read from the DT. This
+        * makes it possible to use the DDR init code without the need
+        * of the "node" variable, describing on which node to access. The
+        * node number is already included implicitly in the base addresses
+        * read from the DT this way.
+        */
+
+       /* Get LMC base address */
+       priv->lmc_base = dev_remap_addr(dev);
+       debug("%s: lmc_base=%p\n", __func__, priv->lmc_base);
+
+       /* Get L2C base address */
+       ret = dev_read_phandle_with_args(dev, "l2c-handle", NULL, 0, 0,
+                                        &l2c_node);
+       if (ret) {
+               printf("Can't access L2C node!\n");
+               return -ENODEV;
+       }
+
+       addr = ofnode_get_addr(l2c_node.node);
+       if (addr == FDT_ADDR_T_NONE) {
+               printf("Can't access L2C node!\n");
+               return -ENODEV;
+       }
+
+       priv->l2c_base = map_physmem(addr, 0, MAP_NOCACHE);
+       debug("%s: l2c_base=%p\n", __func__, priv->l2c_base);
+
+       ddr_conf_ptr = octeon_ddr_conf_table_get(&conf_table_count,
+                                                &def_ddr_freq);
+       if (!ddr_conf_ptr) {
+               printf("ERROR: unable to determine DDR configuration\n");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < conf_table_count; i++) {
+               if (ddr_conf_ptr[i].dimm_config_table[0].spd_addrs[0] ||
+                   ddr_conf_ptr[i].dimm_config_table[0].spd_ptrs[0])
+                       ddr_conf_valid_mask |= 1 << i;
+       }
+
+       /*
+        * Check for special case of mismarked 3005 samples,
+        * and adjust cpuid
+        */
+       alt_refclk = 0;
+       ddr_hertz = def_ddr_freq * 1000000;
+
+       eptr = env_get("ddr_clock_hertz");
+       if (eptr) {
+               ddr_hertz = simple_strtoul(eptr, NULL, 0);
+               gd->mem_clk = divide_nint(ddr_hertz, 1000000);
+               printf("Parameter found in environment. ddr_clock_hertz = %d\n",
+                      ddr_hertz);
+       }
+
+       ddr_ref_hertz = octeon3_refclock(alt_refclk,
+                                        ddr_hertz,
+                                        &ddr_conf_ptr[0].dimm_config_table[0]);
+
+       debug("Initializing DDR, clock = %uhz, reference = %uhz\n",
+             ddr_hertz, ddr_ref_hertz);
+
+       mem_mbytes = octeon_ddr_initialize(priv, gd->cpu_clk,
+                                          ddr_hertz, ddr_ref_hertz,
+                                          ddr_conf_valid_mask,
+                                          ddr_conf_ptr, &measured_ddr_hertz);
+       debug("Mem size in MBYTES: %u\n", mem_mbytes);
+
+       gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+
+       debug("Measured DDR clock %d Hz\n", measured_ddr_hertz);
+
+       if (measured_ddr_hertz != 0) {
+               if (!gd->mem_clk) {
+                       /*
+                        * If ddr_clock not set, use measured clock
+                        * and don't warn
+                        */
+                       gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+               } else if ((measured_ddr_hertz > ddr_hertz + 3000000) ||
+                          (measured_ddr_hertz < ddr_hertz - 3000000)) {
+                       printf("\nWARNING:\n");
+                       printf("WARNING: Measured DDR clock mismatch!  expected: %lld MHz, measured: %lldMHz, cpu clock: %lu MHz\n",
+                              divide_nint(ddr_hertz, 1000000),
+                              divide_nint(measured_ddr_hertz, 1000000),
+                              gd->cpu_clk);
+                       printf("WARNING:\n\n");
+                       gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+               }
+       }
+
+       if (!mem_mbytes)
+               return -ENODEV;
+
+       priv->info.base = CONFIG_SYS_SDRAM_BASE;
+       priv->info.size = MB(mem_mbytes);
+
+       /*
+        * For 6XXX generate a proper error when reading/writing
+        * non-existent memory locations.
+        */
+       cvmx_l2c_set_big_size(priv, mem_mbytes, 0);
+
+       debug("Ram size %uMiB\n", mem_mbytes);
+
+       return 0;
+}
+
+static int octeon_get_info(struct udevice *dev, struct ram_info *info)
+{
+       struct ddr_priv *priv = dev_get_priv(dev);
+
+       *info = priv->info;
+
+       return 0;
+}
+
+static struct ram_ops octeon_ops = {
+       .get_info = octeon_get_info,
+};
+
+static const struct udevice_id octeon_ids[] = {
+       {.compatible = "cavium,octeon-7xxx-ddr4" },
+       { }
+};
+
+U_BOOT_DRIVER(octeon_ddr) = {
+       .name = "octeon_ddr",
+       .id = UCLASS_RAM,
+       .of_match = octeon_ids,
+       .ops = &octeon_ops,
+       .probe = octeon_ddr_probe,
+       .platdata_auto_alloc_size = sizeof(struct ddr_priv),
+};
index 4eb7b34..0971a7c 100644 (file)
@@ -46,6 +46,15 @@ config USB_XHCI_MVEBU
          SoCs, which includes Armada8K, Armada3700 and other Armada
          family SoCs.
 
+config USB_XHCI_OCTEON
+       bool "Support for Marvell Octeon family on-chip xHCI USB controller"
+       depends on ARCH_OCTEON
+       default y
+       help
+         Enables support for the on-chip xHCI controller on Marvell Octeon
+         family SoCs. This is a driver for the dwc3 to provide the glue logic
+         to configure the controller.
+
 config USB_XHCI_PCI
        bool "Support for PCI-based xHCI USB controller"
        depends on DM_USB
index 29d4f87..a12e8f2 100644 (file)
@@ -56,6 +56,7 @@ obj-$(CONFIG_USB_XHCI_OMAP) += xhci-omap.o
 obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o
 obj-$(CONFIG_USB_XHCI_RCAR) += xhci-rcar.o
 obj-$(CONFIG_USB_XHCI_STI) += dwc3-sti-glue.o
+obj-$(CONFIG_USB_XHCI_OCTEON) += dwc3-octeon-glue.o
 
 # designware
 obj-$(CONFIG_USB_DWC2) += dwc2.o
diff --git a/drivers/usb/host/dwc3-octeon-glue.c b/drivers/usb/host/dwc3-octeon-glue.c
new file mode 100644 (file)
index 0000000..39b3185
--- /dev/null
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Octeon family DWC3 specific glue layer
+ *
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ *
+ * The low-level init code is based on the Linux driver octeon-usb.c by
+ * David Daney <david.daney@cavium.com>, which is:
+ * Copyright (C) 2010-2017 Cavium Networks
+ */
+
+#include <dm.h>
+#include <errno.h>
+#include <usb.h>
+#include <asm/io.h>
+#include <dm/lists.h>
+#include <dm/of_access.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/usb/dwc3.h>
+#include <linux/usb/otg.h>
+#include <mach/octeon-model.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define CVMX_GPIO_BIT_CFGX(i)  (0x0001070000000900ull + ((i) * 8))
+#define CVMX_GPIO_XBIT_CFGX(i) (0x0001070000000900ull + \
+                                ((i) & 31) * 8 - 8 * 16)
+
+#define GPIO_BIT_CFG_TX_OE             BIT_ULL(0)
+#define GPIO_BIT_CFG_OUTPUT_SEL                GENMASK_ULL(20, 16)
+
+#define UCTL_CTL_UCTL_RST              BIT_ULL(0)
+#define UCTL_CTL_UAHC_RST              BIT_ULL(1)
+#define UCTL_CTL_UPHY_RST              BIT_ULL(2)
+#define UCTL_CTL_DRD_MODE              BIT_ULL(3)
+#define UCTL_CTL_SCLK_EN               BIT_ULL(4)
+#define UCTL_CTL_HS_POWER_EN           BIT_ULL(12)
+#define UCTL_CTL_SS_POWER_EN           BIT_ULL(14)
+#define UCTL_CTL_H_CLKDIV_SEL          GENMASK_ULL(26, 24)
+#define UCTL_CTL_H_CLKDIV_RST          BIT_ULL(28)
+#define UCTL_CTL_H_CLK_EN              BIT_ULL(30)
+#define UCTL_CTL_REF_CLK_FSEL          GENMASK_ULL(37, 32)
+#define UCTL_CTL_REF_CLK_DIV2          BIT_ULL(38)
+#define UCTL_CTL_REF_SSP_EN            BIT_ULL(39)
+#define UCTL_CTL_MPLL_MULTIPLIER       GENMASK_ULL(46, 40)
+#define UCTL_CTL_SSC_EN                        BIT_ULL(59)
+#define UCTL_CTL_REF_CLK_SEL           GENMASK_ULL(61, 60)
+
+#define UCTL_HOST_CFG                  0xe0
+#define UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN BIT_ULL(24)
+#define UCTL_HOST_CFG_PPC_EN           BIT_ULL(25)
+
+#define UCTL_SHIM_CFG                  0xe8
+#define UCTL_SHIM_CFG_CSR_ENDIAN_MODE  GENMASK_ULL(1, 0)
+#define UCTL_SHIM_CFG_DMA_ENDIAN_MODE  GENMASK_ULL(9, 8)
+
+#define OCTEON_H_CLKDIV_SEL            8
+#define OCTEON_MIN_H_CLK_RATE          150000000
+#define OCTEON_MAX_H_CLK_RATE          300000000
+
+#define CLOCK_50MHZ                    50000000
+#define CLOCK_100MHZ                   100000000
+#define CLOCK_125MHZ                   125000000
+
+static u8 clk_div[OCTEON_H_CLKDIV_SEL] = {1, 2, 4, 6, 8, 16, 24, 32};
+
+static int dwc3_octeon_config_power(struct udevice *dev, void __iomem *base)
+{
+       u64 uctl_host_cfg;
+       u64 gpio_bit;
+       u32 gpio_pwr[3];
+       int gpio, len, power_active_low;
+       const struct device_node *node = dev_np(dev);
+       int index = ((u64)base >> 24) & 1;
+       void __iomem *gpio_bit_cfg;
+
+       if (of_find_property(node, "power", &len)) {
+               if (len == 12) {
+                       dev_read_u32_array(dev, "power", gpio_pwr, 3);
+                       power_active_low = gpio_pwr[2] & 0x01;
+                       gpio = gpio_pwr[1];
+               } else if (len == 8) {
+                       dev_read_u32_array(dev, "power", gpio_pwr, 2);
+                       power_active_low = 0;
+                       gpio = gpio_pwr[1];
+               } else {
+                       printf("dwc3 controller clock init failure\n");
+                       return -EINVAL;
+               }
+
+               gpio_bit_cfg = ioremap(CVMX_GPIO_BIT_CFGX(gpio), 0);
+
+               if ((OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+                    OCTEON_IS_MODEL(OCTEON_CNF75XX)) && gpio <= 31) {
+                       gpio_bit = ioread64(gpio_bit_cfg);
+                       gpio_bit |= GPIO_BIT_CFG_TX_OE;
+                       gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+                       gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+                                              index == 0 ? 0x14 : 0x15);
+                       iowrite64(gpio_bit, gpio_bit_cfg);
+               } else if (gpio <= 15) {
+                       gpio_bit = ioread64(gpio_bit_cfg);
+                       gpio_bit |= GPIO_BIT_CFG_TX_OE;
+                       gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+                       gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+                                              index == 0 ? 0x14 : 0x19);
+                       iowrite64(gpio_bit, gpio_bit_cfg);
+               } else {
+                       gpio_bit_cfg = ioremap(CVMX_GPIO_XBIT_CFGX(gpio), 0);
+
+                       gpio_bit = ioread64(gpio_bit_cfg);
+                       gpio_bit |= GPIO_BIT_CFG_TX_OE;
+                       gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+                       gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+                                              index == 0 ? 0x14 : 0x19);
+                       iowrite64(gpio_bit, gpio_bit_cfg);
+               }
+
+               /* Enable XHCI power control and set if active high or low. */
+               uctl_host_cfg = ioread64(base + UCTL_HOST_CFG);
+               uctl_host_cfg |= UCTL_HOST_CFG_PPC_EN;
+               if (power_active_low)
+                       uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+               else
+                       uctl_host_cfg |= UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+               iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG);
+
+               /* Wait for power to stabilize */
+               mdelay(10);
+       } else {
+               /* Disable XHCI power control and set if active high. */
+               uctl_host_cfg = ioread64(base + UCTL_HOST_CFG);
+               uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_EN;
+               uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+               iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG);
+               dev_warn(dev, "dwc3 controller clock init failure.\n");
+       }
+
+       return 0;
+}
+
+static int dwc3_octeon_clocks_start(struct udevice *dev, void __iomem *base)
+{
+       u64 uctl_ctl;
+       int ref_clk_sel = 2;
+       u64 div;
+       u32 clock_rate;
+       int mpll_mul;
+       int i;
+       u64 h_clk_rate;
+       void __iomem *uctl_ctl_reg = base;
+       const char *ss_clock_type;
+       const char *hs_clock_type;
+
+       i = dev_read_u32(dev, "refclk-frequency", &clock_rate);
+       if (i) {
+               printf("No UCTL \"refclk-frequency\"\n");
+               return -EINVAL;
+       }
+
+       ss_clock_type = dev_read_string(dev, "refclk-type-ss");
+       if (!ss_clock_type) {
+               printf("No UCTL \"refclk-type-ss\"\n");
+               return -EINVAL;
+       }
+
+       hs_clock_type = dev_read_string(dev, "refclk-type-hs");
+       if (!hs_clock_type) {
+               printf("No UCTL \"refclk-type-hs\"\n");
+               return -EINVAL;
+       }
+
+       if (strcmp("dlmc_ref_clk0", ss_clock_type) == 0) {
+               if (strcmp(hs_clock_type, "dlmc_ref_clk0") == 0) {
+                       ref_clk_sel = 0;
+               } else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) {
+                       ref_clk_sel = 2;
+               } else {
+                       printf("Invalid HS clock type %s, using pll_ref_clk\n",
+                              hs_clock_type);
+               }
+       } else if (strcmp(ss_clock_type, "dlmc_ref_clk1") == 0) {
+               if (strcmp(hs_clock_type, "dlmc_ref_clk1") == 0) {
+                       ref_clk_sel = 1;
+               } else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) {
+                       ref_clk_sel = 3;
+               } else {
+                       printf("Invalid HS clock type %s, using pll_ref_clk\n",
+                              hs_clock_type);
+                       ref_clk_sel = 3;
+               }
+       } else {
+               printf("Invalid SS clock type %s, using dlmc_ref_clk0\n",
+                      ss_clock_type);
+       }
+
+       if ((ref_clk_sel == 0 || ref_clk_sel == 1) &&
+           clock_rate != CLOCK_100MHZ)
+               printf("Invalid UCTL clock rate of %u\n", clock_rate);
+
+       /*
+        * Step 1: Wait for all voltages to be stable...that surely
+        *         happened before this driver is started. SKIP
+        */
+
+       /* Step 2: Select GPIO for overcurrent indication, if desired. SKIP */
+
+       /* Step 3: Assert all resets. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl |= UCTL_CTL_UCTL_RST | UCTL_CTL_UAHC_RST | UCTL_CTL_UPHY_RST;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 4a: Reset the clock dividers. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl |= UCTL_CTL_H_CLKDIV_RST;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 4b: Select controller clock frequency. */
+       for (div = ARRAY_SIZE(clk_div) - 1; div >= 0; div--) {
+               h_clk_rate = gd->bus_clk / clk_div[div];
+               if (h_clk_rate <= OCTEON_MAX_H_CLK_RATE &&
+                   h_clk_rate >= OCTEON_MIN_H_CLK_RATE)
+                       break;
+       }
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_H_CLKDIV_SEL;
+       uctl_ctl |= FIELD_PREP(UCTL_CTL_H_CLKDIV_SEL, div);
+       uctl_ctl |= UCTL_CTL_H_CLK_EN;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       if (div != FIELD_GET(UCTL_CTL_H_CLKDIV_SEL, uctl_ctl) ||
+           !(uctl_ctl & UCTL_CTL_H_CLK_EN)) {
+               printf("dwc3 controller clock init failure\n");
+               return -EINVAL;
+       }
+
+       /* Step 4c: Deassert the controller clock divider reset. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_H_CLKDIV_RST;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 5a: Reference clock configuration. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_REF_CLK_SEL;
+       uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_SEL, ref_clk_sel);
+       uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL;
+       uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x07);
+       uctl_ctl &= ~UCTL_CTL_REF_CLK_DIV2;
+
+       switch (clock_rate) {
+       default:
+               printf("Invalid ref_clk %u, using %u instead\n", CLOCK_100MHZ,
+                      clock_rate);
+               fallthrough;
+       case CLOCK_100MHZ:
+               mpll_mul = 0x19;
+               if (ref_clk_sel < 2) {
+                       uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL;
+                       uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x27);
+               }
+               break;
+       case CLOCK_50MHZ:
+               mpll_mul = 0x32;
+               break;
+       case CLOCK_125MHZ:
+               mpll_mul = 0x28;
+               break;
+       }
+       uctl_ctl &= ~UCTL_CTL_MPLL_MULTIPLIER;
+       uctl_ctl |= FIELD_PREP(UCTL_CTL_MPLL_MULTIPLIER, mpll_mul);
+
+       /* Step 5b: Configure and enable spread-spectrum for SuperSpeed. */
+       uctl_ctl |= UCTL_CTL_SSC_EN;
+
+       /* Step 5c: Enable SuperSpeed. */
+       uctl_ctl |= UCTL_CTL_REF_SSP_EN;
+
+       /* Step 5d: Configure PHYs. SKIP */
+
+       /* Step 6a & 6b: Power up PHYs. */
+       uctl_ctl |= UCTL_CTL_HS_POWER_EN;
+       uctl_ctl |= UCTL_CTL_SS_POWER_EN;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 7: Wait 10 controller-clock cycles to take effect. */
+       udelay(10);
+
+       /* Step 8a: Deassert UCTL reset signal. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_UCTL_RST;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 8b: Wait 10 controller-clock cycles. */
+       udelay(10);
+
+       /* Step 8c: Setup power-power control. */
+       if (dwc3_octeon_config_power(dev, base)) {
+               printf("Error configuring power\n");
+               return -EINVAL;
+       }
+
+       /* Step 8d: Deassert UAHC reset signal. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_UAHC_RST;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 8e: Wait 10 controller-clock cycles. */
+       udelay(10);
+
+       /* Step 9: Enable conditional coprocessor clock of UCTL. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl |= UCTL_CTL_SCLK_EN;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       /* Step 10: Set for host mode only. */
+       uctl_ctl = ioread64(uctl_ctl_reg);
+       uctl_ctl &= ~UCTL_CTL_DRD_MODE;
+       iowrite64(uctl_ctl, uctl_ctl_reg);
+
+       return 0;
+}
+
+static void dwc3_octeon_set_endian_mode(void __iomem *base)
+{
+       u64 shim_cfg;
+
+       shim_cfg = ioread64(base + UCTL_SHIM_CFG);
+       shim_cfg &= ~UCTL_SHIM_CFG_CSR_ENDIAN_MODE;
+       shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_CSR_ENDIAN_MODE, 1);
+       shim_cfg &= ~UCTL_SHIM_CFG_DMA_ENDIAN_MODE;
+       shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_DMA_ENDIAN_MODE, 1);
+       iowrite64(shim_cfg, base + UCTL_SHIM_CFG);
+}
+
+static void dwc3_octeon_phy_reset(void __iomem *base)
+{
+       u64 uctl_ctl;
+
+       uctl_ctl = ioread64(base);
+       uctl_ctl &= ~UCTL_CTL_UPHY_RST;
+       iowrite64(uctl_ctl, base);
+}
+
+static int octeon_dwc3_glue_probe(struct udevice *dev)
+{
+       void __iomem *base;
+
+       base = dev_remap_addr(dev);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       dwc3_octeon_clocks_start(dev, base);
+       dwc3_octeon_set_endian_mode(base);
+       dwc3_octeon_phy_reset(base);
+
+       return 0;
+}
+
+static int octeon_dwc3_glue_bind(struct udevice *dev)
+{
+       ofnode node, dwc3_node;
+
+       /* Find snps,dwc3 node from subnode */
+       dwc3_node = ofnode_null();
+       ofnode_for_each_subnode(node, dev->node) {
+               if (ofnode_device_is_compatible(node, "snps,dwc3"))
+                       dwc3_node = node;
+       }
+
+       if (!ofnode_valid(dwc3_node)) {
+               printf("Can't find dwc3 subnode for %s\n", dev->name);
+               return -ENODEV;
+       }
+
+       return dm_scan_fdt_dev(dev);
+}
+
+static const struct udevice_id octeon_dwc3_glue_ids[] = {
+       { .compatible = "cavium,octeon-7130-usb-uctl" },
+       { }
+};
+
+U_BOOT_DRIVER(dwc3_octeon_glue) = {
+       .name = "dwc3_octeon_glue",
+       .id = UCLASS_NOP,
+       .of_match = octeon_dwc3_glue_ids,
+       .probe = octeon_dwc3_glue_probe,
+       .bind = octeon_dwc3_glue_bind,
+       .flags = DM_FLAG_ALLOC_PRIV_DMA,
+};
index 27f8410..045de2f 100644 (file)
@@ -122,7 +122,7 @@ static int xhci_dwc3_probe(struct udevice *dev)
        u32 reg;
        int ret;
 
-       hccr = (struct xhci_hccr *)((uintptr_t)dev_read_addr(dev));
+       hccr = (struct xhci_hccr *)((uintptr_t)dev_remap_addr(dev));
        hcor = (struct xhci_hcor *)((uintptr_t)hccr +
                        HC_LENGTH(xhci_readl(&(hccr)->cr_capbase)));
 
index b118207..13065d7 100644 (file)
@@ -722,8 +722,6 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
 
        BUG_ON(TRB_TO_SLOT_ID(field) != slot_id);
        BUG_ON(TRB_TO_EP_INDEX(field) != ep_index);
-       BUG_ON(*(void **)(uintptr_t)le64_to_cpu(event->trans_event.buffer) -
-               buffer > (size_t)length);
 
        record_transfer_result(udev, event, length);
        xhci_acknowledge_event(ctrl);
index 530f02a..109ef40 100644 (file)
@@ -7,13 +7,20 @@
 #ifndef __OCTEON_COMMON_H__
 #define __OCTEON_COMMON_H__
 
-/* No DDR init yet -> run in L2 cache with limited resources */
+#if defined(CONFIG_RAM_OCTEON)
+#define CONFIG_SYS_MALLOC_LEN          (16 << 20)
+#define CONFIG_SYS_INIT_SP_OFFSET      0x20100000
+#else
+/* No DDR init -> run in L2 cache with limited resources */
 #define CONFIG_SYS_MALLOC_LEN          (256 << 10)
+#define CONFIG_SYS_INIT_SP_OFFSET      0x00180000
+#endif
+
 #define CONFIG_SYS_SDRAM_BASE          0xffffffff80000000
 #define CONFIG_SYS_MONITOR_BASE                CONFIG_SYS_TEXT_BASE
 
 #define CONFIG_SYS_LOAD_ADDR           (CONFIG_SYS_SDRAM_BASE + (1 << 20))
 
-#define CONFIG_SYS_INIT_SP_OFFSET      0x180000
+#define CONFIG_SYS_BOOTM_LEN           (64 << 20)      /* 64M */
 
 #endif /* __OCTEON_COMMON_H__ */