Prepare v2023.10
[platform/kernel/u-boot.git] / tools / kwbimage.c
index da8bfe0..4dce495 100644 (file)
@@ -6,6 +6,8 @@
  *
  * (C) Copyright 2013 Thomas Petazzoni
  * <thomas.petazzoni@free-electrons.com>
+ *
+ * (C) Copyright 2022 Pali Rohár <pali@kernel.org>
  */
 
 #define OPENSSL_API_COMPAT 0x10101000L
@@ -43,6 +45,21 @@ void EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
 }
 #endif
 
+/* fls - find last (most-significant) bit set in 4-bit integer */
+static inline int fls4(int num)
+{
+       if (num & 0x8)
+               return 4;
+       else if (num & 0x4)
+               return 3;
+       else if (num & 0x2)
+               return 2;
+       else if (num & 0x1)
+               return 1;
+       else
+               return 0;
+}
+
 static struct image_cfg_element *image_cfg;
 static int cfgn;
 static int verbose_mode;
@@ -99,6 +116,8 @@ enum image_cfg_type {
        IMAGE_CFG_NAND_BADBLK_LOCATION,
        IMAGE_CFG_NAND_ECC_MODE,
        IMAGE_CFG_NAND_PAGESZ,
+       IMAGE_CFG_SATA_BLKSZ,
+       IMAGE_CFG_CPU,
        IMAGE_CFG_BINARY,
        IMAGE_CFG_DATA,
        IMAGE_CFG_DATA_DELAY,
@@ -129,6 +148,8 @@ static const char * const id_strs[] = {
        [IMAGE_CFG_NAND_BADBLK_LOCATION] = "NAND_BADBLK_LOCATION",
        [IMAGE_CFG_NAND_ECC_MODE] = "NAND_ECC_MODE",
        [IMAGE_CFG_NAND_PAGESZ] = "NAND_PAGE_SIZE",
+       [IMAGE_CFG_SATA_BLKSZ] = "SATA_BLKSZ",
+       [IMAGE_CFG_CPU] = "CPU",
        [IMAGE_CFG_BINARY] = "BINARY",
        [IMAGE_CFG_DATA] = "DATA",
        [IMAGE_CFG_DATA_DELAY] = "DATA_DELAY",
@@ -152,9 +173,11 @@ struct image_cfg_element {
        enum image_cfg_type type;
        union {
                unsigned int version;
+               unsigned int cpu_sheeva;
                unsigned int bootfrom;
                struct {
                        const char *file;
+                       unsigned int loadaddr;
                        unsigned int args[BINARY_MAX_ARGS];
                        unsigned int nargs;
                } binary;
@@ -164,6 +187,7 @@ struct image_cfg_element {
                unsigned int nandbadblklocation;
                unsigned int nandeccmode;
                unsigned int nandpagesz;
+               unsigned int satablksz;
                struct ext_hdr_v0_reg regdata;
                unsigned int regdata_delay;
                unsigned int baudrate;
@@ -199,7 +223,7 @@ static const char *image_boot_mode_name(unsigned int id)
        return NULL;
 }
 
-int image_boot_mode_id(const char *boot_mode_name)
+static int image_boot_mode_id(const char *boot_mode_name)
 {
        int i;
 
@@ -210,7 +234,18 @@ int image_boot_mode_id(const char *boot_mode_name)
        return -1;
 }
 
-int image_nand_ecc_mode_id(const char *nand_ecc_mode_name)
+static const char *image_nand_ecc_mode_name(unsigned int id)
+{
+       int i;
+
+       for (i = 0; nand_ecc_modes[i].name; i++)
+               if (nand_ecc_modes[i].id == id)
+                       return nand_ecc_modes[i].name;
+
+       return NULL;
+}
+
+static int image_nand_ecc_mode_id(const char *nand_ecc_mode_name)
 {
        int i;
 
@@ -280,6 +315,17 @@ static int image_get_bootfrom(void)
        return e->bootfrom;
 }
 
+static int image_is_cpu_sheeva(void)
+{
+       struct image_cfg_element *e;
+
+       e = image_find_option(IMAGE_CFG_CPU);
+       if (!e)
+               return 0;
+
+       return e->cpu_sheeva;
+}
+
 /*
  * Compute a 8-bit checksum of a memory area. This algorithm follows
  * the requirements of the Marvell SoC BootROM specifications.
@@ -344,6 +390,29 @@ static uint32_t image_checksum32(void *start, uint32_t len)
        return csum;
 }
 
+static unsigned int options_to_baudrate(uint8_t options)
+{
+       switch (options & 0x7) {
+       case MAIN_HDR_V1_OPT_BAUD_2400:
+               return 2400;
+       case MAIN_HDR_V1_OPT_BAUD_4800:
+               return 4800;
+       case MAIN_HDR_V1_OPT_BAUD_9600:
+               return 9600;
+       case MAIN_HDR_V1_OPT_BAUD_19200:
+               return 19200;
+       case MAIN_HDR_V1_OPT_BAUD_38400:
+               return 38400;
+       case MAIN_HDR_V1_OPT_BAUD_57600:
+               return 57600;
+       case MAIN_HDR_V1_OPT_BAUD_115200:
+               return 115200;
+       case MAIN_HDR_V1_OPT_BAUD_DEFAULT:
+       default:
+               return 0;
+       }
+}
+
 static uint8_t baudrate_to_option(unsigned int baudrate)
 {
        switch (baudrate) {
@@ -602,7 +671,8 @@ static int kwb_export_pubkey(RSA *key, struct pubkey_der_v1 *dst, FILE *hashf,
        return 0;
 }
 
-int kwb_sign(RSA *key, void *data, int datasz, struct sig_v1 *sig, char *signame)
+static int kwb_sign(RSA *key, void *data, int datasz, struct sig_v1 *sig,
+                   char *signame)
 {
        EVP_PKEY *evp_key;
        EVP_MD_CTX *ctx;
@@ -662,8 +732,8 @@ err_key:
        return ret;
 }
 
-int kwb_verify(RSA *key, void *data, int datasz, struct sig_v1 *sig,
-              char *signame)
+static int kwb_verify(RSA *key, void *data, int datasz, struct sig_v1 *sig,
+                     char *signame)
 {
        EVP_PKEY *evp_key;
        EVP_MD_CTX *ctx;
@@ -722,8 +792,8 @@ err_key:
        return ret;
 }
 
-int kwb_sign_and_verify(RSA *key, void *data, int datasz, struct sig_v1 *sig,
-                       char *signame)
+static int kwb_sign_and_verify(RSA *key, void *data, int datasz,
+                              struct sig_v1 *sig, char *signame)
 {
        if (kwb_sign(key, data, datasz, sig, signame) < 0)
                return -1;
@@ -735,7 +805,7 @@ int kwb_sign_and_verify(RSA *key, void *data, int datasz, struct sig_v1 *sig,
 }
 
 
-int kwb_dump_fuse_cmds_38x(FILE *out, struct secure_hdr_v1 *sec_hdr)
+static int kwb_dump_fuse_cmds_38x(FILE *out, struct secure_hdr_v1 *sec_hdr)
 {
        struct hash_v1 kak_pub_hash;
        struct image_cfg_element *e;
@@ -860,13 +930,86 @@ done:
        return ret;
 }
 
+static int image_fill_xip_header(void *image, struct image_tool_params *params)
+{
+       struct main_hdr_v1 *main_hdr = image; /* kwbimage v0 and v1 have same XIP members */
+       int version = kwbimage_version(image);
+       uint32_t srcaddr = le32_to_cpu(main_hdr->srcaddr);
+       uint32_t startaddr = 0;
+
+       if (main_hdr->blockid != IBR_HDR_SPI_ID) {
+               fprintf(stderr, "XIP is supported only for SPI images\n");
+               return 0;
+       }
+
+       if (version == 0 &&
+                  params->addr >= 0xE8000000 && params->addr < 0xEFFFFFFF &&
+                  params->ep >= 0xE8000000 && params->ep < 0xEFFFFFFF) {
+               /* Load and Execute address is in SPI address space (kwbimage v0) */
+               startaddr = 0xE8000000;
+       } else if (version != 0 &&
+                  params->addr >= 0xD4000000 && params->addr < 0xD7FFFFFF &&
+                  params->ep >= 0xD4000000 && params->ep < 0xD7FFFFFF) {
+               /* Load and Execute address is in SPI address space (kwbimage v1) */
+               startaddr = 0xD4000000;
+       } else if (version != 0 &&
+                  params->addr >= 0xD8000000 && params->addr < 0xDFFFFFFF &&
+                  params->ep >= 0xD8000000 && params->ep < 0xDFFFFFFF) {
+               /* Load and Execute address is in Device bus space (kwbimage v1) */
+               startaddr = 0xD8000000;
+       } else if (params->addr != 0x0) {
+               /* Load address is non-zero */
+               if (version == 0)
+                       fprintf(stderr, "XIP Load Address or XIP Entry Point is not in SPI address space\n");
+               else
+                       fprintf(stderr, "XIP Load Address or XIP Entry Point is not in SPI nor in Device bus address space\n");
+               return 0;
+       }
+
+       /*
+        * For XIP destaddr must be set to 0xFFFFFFFF and
+        * execaddr relative to the start of XIP memory address space.
+        */
+       main_hdr->destaddr = cpu_to_le32(0xFFFFFFFF);
+
+       if (startaddr == 0) {
+               /*
+                * mkimage's --load-address 0x0 means that binary is Position
+                * Independent and in this case mkimage's --entry-point address
+                * is relative offset from beginning of the data part of image.
+                */
+               main_hdr->execaddr = cpu_to_le32(srcaddr + params->ep);
+       } else {
+               /* The lowest possible load address is after the header at srcaddr. */
+               if (params->addr - startaddr < srcaddr) {
+                       fprintf(stderr,
+                               "Invalid XIP Load Address 0x%08x.\n"
+                               "The lowest address for this configuration is 0x%08x.\n",
+                               params->addr, (unsigned)(startaddr + srcaddr));
+                       return 0;
+               }
+               main_hdr->srcaddr = cpu_to_le32(params->addr - startaddr);
+               main_hdr->execaddr = cpu_to_le32(params->ep - startaddr);
+       }
+
+       return 1;
+}
+
+static unsigned int image_get_satablksz(void)
+{
+       struct image_cfg_element *e;
+       e = image_find_option(IMAGE_CFG_SATA_BLKSZ);
+       return e ? e->satablksz : 512;
+}
+
 static size_t image_headersz_align(size_t headersz, uint8_t blockid)
 {
        /*
         * Header needs to be 4-byte aligned, which is already ensured by code
         * above. Moreover UART images must have header aligned to 128 bytes
         * (xmodem block size), NAND images to 256 bytes (ECC calculation),
-        * and SATA and SDIO images to 512 bytes (storage block size).
+        * SDIO images to 512 bytes (SDHC/SDXC fixed block size) and SATA
+        * images to specified storage block size (default 512 bytes).
         * Note that SPI images do not have to have header size aligned
         * to 256 bytes because it is possible to read from SPI storage from
         * any offset (read offset does not have to be aligned to block size).
@@ -875,8 +1018,10 @@ static size_t image_headersz_align(size_t headersz, uint8_t blockid)
                return ALIGN(headersz, 128);
        else if (blockid == IBR_HDR_NAND_ID)
                return ALIGN(headersz, 256);
-       else if (blockid == IBR_HDR_SATA_ID || blockid == IBR_HDR_SDIO_ID)
+       else if (blockid == IBR_HDR_SDIO_ID)
                return ALIGN(headersz, 512);
+       else if (blockid == IBR_HDR_SATA_ID)
+               return ALIGN(headersz, image_get_satablksz());
        else
                return headersz;
 }
@@ -892,10 +1037,10 @@ static size_t image_headersz_v0(int *hasext)
                        *hasext = 1;
        }
 
-       return image_headersz_align(headersz, image_get_bootfrom());
+       return headersz;
 }
 
-static void *image_create_v0(size_t *imagesz, struct image_tool_params *params,
+static void *image_create_v0(size_t *dataoff, struct image_tool_params *params,
                             int payloadsz)
 {
        struct image_cfg_element *e;
@@ -905,10 +1050,11 @@ static void *image_create_v0(size_t *imagesz, struct image_tool_params *params,
        int has_ext = 0;
 
        /*
-        * Calculate the size of the header and the size of the
+        * Calculate the size of the header and the offset of the
         * payload
         */
        headersz = image_headersz_v0(&has_ext);
+       *dataoff = image_headersz_align(headersz, image_get_bootfrom());
 
        image = malloc(headersz);
        if (!image) {
@@ -923,7 +1069,7 @@ static void *image_create_v0(size_t *imagesz, struct image_tool_params *params,
        /* Fill in the main header */
        main_hdr->blocksize =
                cpu_to_le32(payloadsz);
-       main_hdr->srcaddr   = cpu_to_le32(headersz);
+       main_hdr->srcaddr   = cpu_to_le32(*dataoff);
        main_hdr->ext       = has_ext;
        main_hdr->version   = 0;
        main_hdr->destaddr  = cpu_to_le32(params->addr);
@@ -933,34 +1079,34 @@ static void *image_create_v0(size_t *imagesz, struct image_tool_params *params,
        e = image_find_option(IMAGE_CFG_NAND_ECC_MODE);
        if (e)
                main_hdr->nandeccmode = e->nandeccmode;
+       e = image_find_option(IMAGE_CFG_NAND_BLKSZ);
+       if (e)
+               main_hdr->nandblocksize = e->nandblksz / (64 * 1024);
        e = image_find_option(IMAGE_CFG_NAND_PAGESZ);
        if (e)
                main_hdr->nandpagesize = cpu_to_le16(e->nandpagesz);
-       main_hdr->checksum = image_checksum8(image,
-                                            sizeof(struct main_hdr_v0));
-
-       /*
-        * For SATA srcaddr is specified in number of sectors starting from
-        * sector 0. The main header is stored at sector number 1.
-        * This expects the sector size to be 512 bytes.
-        * Header size is already aligned.
-        */
-       if (main_hdr->blockid == IBR_HDR_SATA_ID)
-               main_hdr->srcaddr = cpu_to_le32(headersz / 512 + 1);
+       e = image_find_option(IMAGE_CFG_NAND_BADBLK_LOCATION);
+       if (e)
+               main_hdr->nandbadblklocation = e->nandbadblklocation;
 
-       /*
-        * For SDIO srcaddr is specified in number of sectors starting from
-        * sector 0. The main header is stored at sector number 0.
-        * This expects sector size to be 512 bytes.
-        * Header size is already aligned.
-        */
-       if (main_hdr->blockid == IBR_HDR_SDIO_ID)
-               main_hdr->srcaddr = cpu_to_le32(headersz / 512);
+       /* For SATA srcaddr is specified in number of sectors. */
+       if (main_hdr->blockid == IBR_HDR_SATA_ID) {
+               params->bl_len = image_get_satablksz();
+               main_hdr->srcaddr = cpu_to_le32(le32_to_cpu(main_hdr->srcaddr) / params->bl_len);
+       }
 
        /* For PCIe srcaddr is not used and must be set to 0xFFFFFFFF. */
        if (main_hdr->blockid == IBR_HDR_PEX_ID)
                main_hdr->srcaddr = cpu_to_le32(0xFFFFFFFF);
 
+       if (params->xflag) {
+               if (!image_fill_xip_header(main_hdr, params)) {
+                       free(image);
+                       return NULL;
+               }
+               *dataoff = le32_to_cpu(main_hdr->srcaddr);
+       }
+
        /* Generate the ext header */
        if (has_ext) {
                struct ext_hdr_v0 *ext_hdr;
@@ -986,21 +1132,22 @@ static void *image_create_v0(size_t *imagesz, struct image_tool_params *params,
                                                    sizeof(struct ext_hdr_v0));
        }
 
-       *imagesz = headersz;
+       main_hdr->checksum = image_checksum8(image,
+                                            sizeof(struct main_hdr_v0));
+
        return image;
 }
 
 static size_t image_headersz_v1(int *hasext)
 {
-       struct image_cfg_element *binarye;
+       struct image_cfg_element *e;
        unsigned int count;
        size_t headersz;
+       int cpu_sheeva;
+       struct stat s;
        int cfgi;
+       int ret;
 
-       /*
-        * Calculate the size of the header and the size of the
-        * payload
-        */
        headersz = sizeof(struct main_hdr_v1);
 
        if (image_get_csk_index() >= 0) {
@@ -1009,19 +1156,25 @@ static size_t image_headersz_v1(int *hasext)
                        *hasext = 1;
        }
 
-       count = image_count_options(IMAGE_CFG_DATA);
-       if (count > 0)
-               headersz += sizeof(struct register_set_hdr_v1) + 8 * count + 4;
+       cpu_sheeva = image_is_cpu_sheeva();
 
+       count = 0;
        for (cfgi = 0; cfgi < cfgn; cfgi++) {
-               int ret;
-               struct stat s;
+               e = &image_cfg[cfgi];
+
+               if (e->type == IMAGE_CFG_DATA)
+                       count++;
 
-               binarye = &image_cfg[cfgi];
-               if (binarye->type != IMAGE_CFG_BINARY)
+               if (e->type == IMAGE_CFG_DATA_DELAY ||
+                   (e->type == IMAGE_CFG_BINARY && count > 0)) {
+                       headersz += sizeof(struct register_set_hdr_v1) + 8 * count + 4;
+                       count = 0;
+               }
+
+               if (e->type != IMAGE_CFG_BINARY)
                        continue;
 
-               ret = stat(binarye->binary.file, &s);
+               ret = stat(e->binary.file, &s);
                if (ret < 0) {
                        char cwd[PATH_MAX];
                        char *dir = cwd;
@@ -1035,31 +1188,85 @@ static size_t image_headersz_v1(int *hasext)
                        fprintf(stderr,
                                "Didn't find the file '%s' in '%s' which is mandatory to generate the image\n"
                                "This file generally contains the DDR3 training code, and should be extracted from an existing bootable\n"
-                               "image for your board. Use 'dumpimage -T kwbimage -p 0' to extract it from an existing image.\n",
-                               binarye->binary.file, dir);
+                               "image for your board. Use 'dumpimage -T kwbimage -p 1' to extract it from an existing image.\n",
+                               e->binary.file, dir);
                        return 0;
                }
 
                headersz += sizeof(struct opt_hdr_v1) + sizeof(uint32_t) +
-                       (binarye->binary.nargs) * sizeof(uint32_t);
-               headersz = ALIGN(headersz, 16);
+                       (e->binary.nargs) * sizeof(uint32_t);
+
+               if (e->binary.loadaddr) {
+                       /*
+                        * BootROM loads kwbimage header (in which the
+                        * executable code is also stored) to address
+                        * 0x40004000 or 0x40000000. Thus there is
+                        * restriction for the load address of the N-th
+                        * BINARY image.
+                        */
+                       unsigned int base_addr, low_addr, high_addr;
+
+                       base_addr = cpu_sheeva ? 0x40004000 : 0x40000000;
+                       low_addr = base_addr + headersz;
+                       high_addr = low_addr +
+                                   (BINARY_MAX_ARGS - e->binary.nargs) * sizeof(uint32_t);
+
+                       if (cpu_sheeva && e->binary.loadaddr % 16) {
+                               fprintf(stderr,
+                                       "Invalid LOAD_ADDRESS 0x%08x for BINARY %s with %d args.\n"
+                                       "Address for CPU SHEEVA must be 16-byte aligned.\n",
+                                       e->binary.loadaddr, e->binary.file, e->binary.nargs);
+                               return 0;
+                       }
+
+                       if (e->binary.loadaddr % 4 || e->binary.loadaddr < low_addr ||
+                           e->binary.loadaddr > high_addr) {
+                               fprintf(stderr,
+                                       "Invalid LOAD_ADDRESS 0x%08x for BINARY %s with %d args.\n"
+                                       "Address must be 4-byte aligned and in range 0x%08x-0x%08x.\n",
+                                       e->binary.loadaddr, e->binary.file,
+                                       e->binary.nargs, low_addr, high_addr);
+                               return 0;
+                       }
+                       headersz = e->binary.loadaddr - base_addr;
+               } else if (cpu_sheeva) {
+                       headersz = ALIGN(headersz, 16);
+               } else {
+                       headersz = ALIGN(headersz, 4);
+               }
+
                headersz += ALIGN(s.st_size, 4) + sizeof(uint32_t);
                if (hasext)
                        *hasext = 1;
        }
 
-       return image_headersz_align(headersz, image_get_bootfrom());
+       if (count > 0)
+               headersz += sizeof(struct register_set_hdr_v1) + 8 * count + 4;
+
+       /*
+        * For all images except UART, headersz stored in header itself should
+        * contains header size without padding. For UART image BootROM rounds
+        * down headersz to multiply of 128 bytes. Therefore align UART headersz
+        * to multiply of 128 bytes to ensure that remaining UART header bytes
+        * are not ignored by BootROM.
+        */
+       if (image_get_bootfrom() == IBR_HDR_UART_ID)
+               headersz = ALIGN(headersz, 128);
+
+       return headersz;
 }
 
-int add_binary_header_v1(uint8_t **cur, uint8_t **next_ext,
-                        struct image_cfg_element *binarye,
-                        struct main_hdr_v1 *main_hdr)
+static int add_binary_header_v1(uint8_t **cur, uint8_t **next_ext,
+                               struct image_cfg_element *binarye,
+                               struct main_hdr_v1 *main_hdr)
 {
        struct opt_hdr_v1 *hdr = (struct opt_hdr_v1 *)*cur;
+       uint32_t base_addr;
        uint32_t add_args;
        uint32_t offset;
        uint32_t *args;
        size_t binhdrsz;
+       int cpu_sheeva;
        struct stat s;
        int argi;
        FILE *bin;
@@ -1091,13 +1298,22 @@ int add_binary_header_v1(uint8_t **cur, uint8_t **next_ext,
        *cur += (binarye->binary.nargs + 1) * sizeof(uint32_t);
 
        /*
-        * ARM executable code inside the BIN header on some mvebu platforms
-        * (e.g. A370, AXP) must always be aligned with the 128-bit boundary.
+        * ARM executable code inside the BIN header on platforms with Sheeva
+        * CPU (A370 and AXP) must always be aligned with the 128-bit boundary.
+        * In the case when this code is not position independent (e.g. ARM
+        * SPL), it must be placed at fixed load and execute address.
         * This requirement can be met by inserting dummy arguments into
         * BIN header, if needed.
         */
+       cpu_sheeva = image_is_cpu_sheeva();
+       base_addr = cpu_sheeva ? 0x40004000 : 0x40000000;
        offset = *cur - (uint8_t *)main_hdr;
-       add_args = ((16 - offset % 16) % 16) / sizeof(uint32_t);
+       if (binarye->binary.loadaddr)
+               add_args = (binarye->binary.loadaddr - base_addr - offset) / sizeof(uint32_t);
+       else if (cpu_sheeva)
+               add_args = ((16 - offset % 16) % 16) / sizeof(uint32_t);
+       else
+               add_args = 0;
        if (add_args) {
                *(args - 1) = cpu_to_le32(binarye->binary.nargs + add_args);
                *cur += add_args * sizeof(uint32_t);
@@ -1135,7 +1351,7 @@ err_close:
        return -1;
 }
 
-int export_pub_kak_hash(RSA *kak, struct secure_hdr_v1 *secure_hdr)
+static int export_pub_kak_hash(RSA *kak, struct secure_hdr_v1 *secure_hdr)
 {
        FILE *hashf;
        int res;
@@ -1154,8 +1370,8 @@ int export_pub_kak_hash(RSA *kak, struct secure_hdr_v1 *secure_hdr)
        return res < 0 ? 1 : 0;
 }
 
-int kwb_sign_csk_with_kak(struct image_tool_params *params,
-                         struct secure_hdr_v1 *secure_hdr, RSA *csk)
+static int kwb_sign_csk_with_kak(struct image_tool_params *params,
+                                struct secure_hdr_v1 *secure_hdr, RSA *csk)
 {
        RSA *kak = NULL;
        RSA *kak_pub = NULL;
@@ -1196,16 +1412,14 @@ int kwb_sign_csk_with_kak(struct image_tool_params *params,
        return 0;
 }
 
-int add_secure_header_v1(struct image_tool_params *params, uint8_t *ptr,
-                        int payloadsz, size_t headersz, uint8_t *image,
-                        struct secure_hdr_v1 *secure_hdr)
+static int add_secure_header_v1(struct image_tool_params *params, uint8_t *image_ptr,
+                               size_t image_size, uint8_t *header_ptr, size_t headersz,
+                               struct secure_hdr_v1 *secure_hdr)
 {
        struct image_cfg_element *e_jtagdelay;
        struct image_cfg_element *e_boxid;
        struct image_cfg_element *e_flashid;
        RSA *csk = NULL;
-       unsigned char *image_ptr;
-       size_t image_size;
        struct sig_v1 tmp_sig;
        bool specialized_img = image_get_spezialized_img();
 
@@ -1231,14 +1445,11 @@ int add_secure_header_v1(struct image_tool_params *params, uint8_t *ptr,
        if (kwb_sign_csk_with_kak(params, secure_hdr, csk))
                return 1;
 
-       image_ptr = ptr + headersz;
-       image_size = payloadsz - headersz;
-
-       if (kwb_sign_and_verify(csk, image_ptr, image_size,
+       if (kwb_sign_and_verify(csk, image_ptr, image_size - 4,
                                &secure_hdr->imgsig, "image") < 0)
                return 1;
 
-       if (kwb_sign_and_verify(csk, image, headersz, &tmp_sig, "header") < 0)
+       if (kwb_sign_and_verify(csk, header_ptr, headersz, &tmp_sig, "header") < 0)
                return 1;
 
        secure_hdr->hdrsig = tmp_sig;
@@ -1248,27 +1459,44 @@ int add_secure_header_v1(struct image_tool_params *params, uint8_t *ptr,
        return 0;
 }
 
-static void *image_create_v1(size_t *imagesz, struct image_tool_params *params,
+static void finish_register_set_header_v1(uint8_t **cur, uint8_t **next_ext,
+                                         struct register_set_hdr_v1 *register_set_hdr,
+                                         int *datai, uint8_t delay)
+{
+       int size = sizeof(struct register_set_hdr_v1) + 8 * (*datai) + 4;
+
+       register_set_hdr->headertype = OPT_HDR_V1_REGISTER_TYPE;
+       register_set_hdr->headersz_lsb = cpu_to_le16(size & 0xFFFF);
+       register_set_hdr->headersz_msb = size >> 16;
+       register_set_hdr->data[*datai].last_entry.delay = delay;
+       *cur += size;
+       **next_ext = 1;
+       *next_ext = &register_set_hdr->data[*datai].last_entry.next;
+       *datai = 0;
+}
+
+static void *image_create_v1(size_t *dataoff, struct image_tool_params *params,
                             uint8_t *ptr, int payloadsz)
 {
        struct image_cfg_element *e;
        struct main_hdr_v1 *main_hdr;
-       struct opt_hdr_v1 *ohdr;
        struct register_set_hdr_v1 *register_set_hdr;
        struct secure_hdr_v1 *secure_hdr = NULL;
        size_t headersz;
        uint8_t *image, *cur;
        int hasext = 0;
        uint8_t *next_ext = NULL;
-       int cfgi, datai, size;
+       int cfgi, datai;
+       uint8_t delay;
 
        /*
-        * Calculate the size of the header and the size of the
+        * Calculate the size of the header and the offset of the
         * payload
         */
        headersz = image_headersz_v1(&hasext);
        if (headersz == 0)
                return NULL;
+       *dataoff = image_headersz_align(headersz, image_get_bootfrom());
 
        image = malloc(headersz);
        if (!image) {
@@ -1290,7 +1518,7 @@ static void *image_create_v1(size_t *imagesz, struct image_tool_params *params,
        main_hdr->headersz_msb = (headersz & 0xFFFF0000) >> 16;
        main_hdr->destaddr     = cpu_to_le32(params->addr);
        main_hdr->execaddr     = cpu_to_le32(params->ep);
-       main_hdr->srcaddr      = cpu_to_le32(headersz);
+       main_hdr->srcaddr      = cpu_to_le32(*dataoff);
        main_hdr->ext          = hasext;
        main_hdr->version      = 1;
        main_hdr->blockid      = image_get_bootfrom();
@@ -1317,28 +1545,24 @@ static void *image_create_v1(size_t *imagesz, struct image_tool_params *params,
        if (e)
                main_hdr->flags = e->debug ? 0x1 : 0;
 
-       /*
-        * For SATA srcaddr is specified in number of sectors starting from
-        * sector 0. The main header is stored at sector number 1.
-        * This expects the sector size to be 512 bytes.
-        * Header size is already aligned.
-        */
-       if (main_hdr->blockid == IBR_HDR_SATA_ID)
-               main_hdr->srcaddr = cpu_to_le32(headersz / 512 + 1);
-
-       /*
-        * For SDIO srcaddr is specified in number of sectors starting from
-        * sector 0. The main header is stored at sector number 0.
-        * This expects sector size to be 512 bytes.
-        * Header size is already aligned.
-        */
-       if (main_hdr->blockid == IBR_HDR_SDIO_ID)
-               main_hdr->srcaddr = cpu_to_le32(headersz / 512);
+       /* For SATA srcaddr is specified in number of sectors. */
+       if (main_hdr->blockid == IBR_HDR_SATA_ID) {
+               params->bl_len = image_get_satablksz();
+               main_hdr->srcaddr = cpu_to_le32(le32_to_cpu(main_hdr->srcaddr) / params->bl_len);
+       }
 
        /* For PCIe srcaddr is not used and must be set to 0xFFFFFFFF. */
        if (main_hdr->blockid == IBR_HDR_PEX_ID)
                main_hdr->srcaddr = cpu_to_le32(0xFFFFFFFF);
 
+       if (params->xflag) {
+               if (!image_fill_xip_header(main_hdr, params)) {
+                       free(image);
+                       return NULL;
+               }
+               *dataoff = le32_to_cpu(main_hdr->srcaddr);
+       }
+
        if (image_get_csk_index() >= 0) {
                /*
                 * only reserve the space here; we fill the header later since
@@ -1351,71 +1575,66 @@ static void *image_create_v1(size_t *imagesz, struct image_tool_params *params,
        }
 
        datai = 0;
-       register_set_hdr = (struct register_set_hdr_v1 *)cur;
        for (cfgi = 0; cfgi < cfgn; cfgi++) {
                e = &image_cfg[cfgi];
                if (e->type != IMAGE_CFG_DATA &&
-                   e->type != IMAGE_CFG_DATA_DELAY)
-                       continue;
-               if (e->type == IMAGE_CFG_DATA_DELAY) {
-                       size = sizeof(struct register_set_hdr_v1) + 8 * datai + 4;
-                       register_set_hdr->headertype = OPT_HDR_V1_REGISTER_TYPE;
-                       register_set_hdr->headersz_lsb = cpu_to_le16(size & 0xFFFF);
-                       register_set_hdr->headersz_msb = size >> 16;
-                       register_set_hdr->data[datai].last_entry.delay = e->regdata_delay;
-                       cur += size;
-                       *next_ext = 1;
-                       next_ext = &register_set_hdr->data[datai].last_entry.next;
-                       datai = 0;
+                   e->type != IMAGE_CFG_DATA_DELAY &&
+                   e->type != IMAGE_CFG_BINARY)
                        continue;
+
+               if (datai == 0)
+                       register_set_hdr = (struct register_set_hdr_v1 *)cur;
+
+               /* If delay is not specified, use the smallest possible value. */
+               if (e->type == IMAGE_CFG_DATA_DELAY)
+                       delay = e->regdata_delay;
+               else
+                       delay = REGISTER_SET_HDR_OPT_DELAY_MS(0);
+
+               /*
+                * DATA_DELAY command is the last entry in the register set
+                * header and BINARY command inserts new binary header.
+                * Therefore BINARY command requires to finish register set
+                * header if some DATA command was specified. And DATA_DELAY
+                * command automatically finish register set header even when
+                * there was no DATA command.
+                */
+               if (e->type == IMAGE_CFG_DATA_DELAY ||
+                   (e->type == IMAGE_CFG_BINARY && datai != 0))
+                       finish_register_set_header_v1(&cur, &next_ext, register_set_hdr,
+                                                     &datai, delay);
+
+               if (e->type == IMAGE_CFG_DATA) {
+                       register_set_hdr->data[datai].entry.address =
+                               cpu_to_le32(e->regdata.raddr);
+                       register_set_hdr->data[datai].entry.value =
+                               cpu_to_le32(e->regdata.rdata);
+                       datai++;
+               }
+
+               if (e->type == IMAGE_CFG_BINARY) {
+                       if (add_binary_header_v1(&cur, &next_ext, e, main_hdr))
+                               return NULL;
                }
-               register_set_hdr->data[datai].entry.address =
-                       cpu_to_le32(e->regdata.raddr);
-               register_set_hdr->data[datai].entry.value =
-                       cpu_to_le32(e->regdata.rdata);
-               datai++;
        }
        if (datai != 0) {
-               size = sizeof(struct register_set_hdr_v1) + 8 * datai + 4;
-               register_set_hdr->headertype = OPT_HDR_V1_REGISTER_TYPE;
-               register_set_hdr->headersz_lsb = cpu_to_le16(size & 0xFFFF);
-               register_set_hdr->headersz_msb = size >> 16;
-               /* Set delay to the smallest possible value 1ms. */
-               register_set_hdr->data[datai].last_entry.delay = 1;
-               cur += size;
-               *next_ext = 1;
-               next_ext = &register_set_hdr->data[datai].last_entry.next;
+               /* Set delay to the smallest possible value. */
+               delay = REGISTER_SET_HDR_OPT_DELAY_MS(0);
+               finish_register_set_header_v1(&cur, &next_ext, register_set_hdr,
+                                             &datai, delay);
        }
 
-       for (cfgi = 0; cfgi < cfgn; cfgi++) {
-               e = &image_cfg[cfgi];
-               if (e->type != IMAGE_CFG_BINARY)
-                       continue;
-
-               if (add_binary_header_v1(&cur, &next_ext, e, main_hdr))
-                       return NULL;
-       }
-
-       if (secure_hdr && add_secure_header_v1(params, ptr, payloadsz + headersz,
-                                              headersz, image, secure_hdr))
+       if (secure_hdr && add_secure_header_v1(params, ptr + *dataoff, payloadsz,
+                                              image, headersz, secure_hdr))
                return NULL;
 
        /* Calculate and set the header checksum */
        main_hdr->checksum = image_checksum8(main_hdr, headersz);
 
-       *imagesz = headersz;
-
-       /* Fill the real header size without padding into the main header */
-       headersz = sizeof(*main_hdr);
-       for_each_opt_hdr_v1 (ohdr, main_hdr)
-               headersz += opt_hdr_v1_size(ohdr);
-       main_hdr->headersz_lsb = cpu_to_le16(headersz & 0xFFFF);
-       main_hdr->headersz_msb = (headersz & 0xFFFF0000) >> 16;
-
        return image;
 }
 
-int recognize_keyword(char *keyword)
+static int recognize_keyword(char *keyword)
 {
        int kw_id;
 
@@ -1455,6 +1674,18 @@ static int image_create_config_parse_oneline(char *line,
        case IMAGE_CFG_VERSION:
                el->version = atoi(value1);
                break;
+       case IMAGE_CFG_CPU:
+               if (strcmp(value1, "FEROCEON") == 0)
+                       el->cpu_sheeva = 0;
+               else if (strcmp(value1, "SHEEVA") == 0)
+                       el->cpu_sheeva = 1;
+               else if (strcmp(value1, "A9") == 0)
+                       el->cpu_sheeva = 0;
+               else {
+                       fprintf(stderr, "Invalid CPU %s\n", value1);
+                       return -1;
+               }
+               break;
        case IMAGE_CFG_BOOT_FROM:
                ret = image_boot_mode_id(value1);
 
@@ -1482,16 +1713,53 @@ static int image_create_config_parse_oneline(char *line,
        case IMAGE_CFG_NAND_PAGESZ:
                el->nandpagesz = strtoul(value1, NULL, 16);
                break;
+       case IMAGE_CFG_SATA_BLKSZ:
+               el->satablksz = strtoul(value1, NULL, 0);
+               if (el->satablksz & (el->satablksz-1)) {
+                       fprintf(stderr, "Invalid SATA block size '%s'\n", value1);
+                       return -1;
+               }
+               break;
        case IMAGE_CFG_BINARY:
                argi = 0;
 
                el->binary.file = strdup(value1);
                while (1) {
                        char *value = strtok_r(NULL, delimiters, &saveptr);
+                       char *endptr;
 
                        if (!value)
                                break;
-                       el->binary.args[argi] = strtoul(value, NULL, 16);
+
+                       if (!strcmp(value, "LOAD_ADDRESS")) {
+                               value = strtok_r(NULL, delimiters, &saveptr);
+                               if (!value) {
+                                       fprintf(stderr,
+                                               "Missing address argument for BINARY LOAD_ADDRESS\n");
+                                       return -1;
+                               }
+                               el->binary.loadaddr = strtoul(value, &endptr, 16);
+                               if (*endptr) {
+                                       fprintf(stderr,
+                                               "Invalid argument '%s' for BINARY LOAD_ADDRESS\n",
+                                               value);
+                                       return -1;
+                               }
+                               value = strtok_r(NULL, delimiters, &saveptr);
+                               if (value) {
+                                       fprintf(stderr,
+                                               "Unexpected argument '%s' after BINARY LOAD_ADDRESS\n",
+                                               value);
+                                       return -1;
+                               }
+                               break;
+                       }
+
+                       el->binary.args[argi] = strtoul(value, &endptr, 16);
+                       if (*endptr) {
+                               fprintf(stderr, "Invalid argument '%s' for BINARY\n", value);
+                               return -1;
+                       }
                        argi++;
                        if (argi >= BINARY_MAX_ARGS) {
                                fprintf(stderr,
@@ -1518,6 +1786,10 @@ static int image_create_config_parse_oneline(char *line,
                        el->regdata_delay = REGISTER_SET_HDR_OPT_DELAY_SDRAM_SETUP;
                else
                        el->regdata_delay = REGISTER_SET_HDR_OPT_DELAY_MS(strtoul(value1, NULL, 10));
+               if (el->regdata_delay > 255) {
+                       fprintf(stderr, "Maximal DATA_DELAY is 255\n");
+                       return -1;
+               }
                break;
        case IMAGE_CFG_BAUDRATE:
                el->baudrate = strtoul(value1, NULL, 10);
@@ -1633,17 +1905,21 @@ static void kwbimage_set_header(void *ptr, struct stat *sbuf, int ifd,
        FILE *fcfg;
        void *image = NULL;
        int version;
-       size_t headersz = 0;
+       size_t dataoff = 0;
        size_t datasz;
        uint32_t checksum;
        struct stat s;
        int ret;
 
+       params->bl_len = 1;
+
        /*
         * Do not use sbuf->st_size as it contains size with padding.
         * We need original image data size, so stat original file.
         */
-       if (stat(params->datafile, &s)) {
+       if (params->skipcpy) {
+               s.st_size = 0;
+       } else if (stat(params->datafile, &s)) {
                fprintf(stderr, "Could not stat data file %s: %s\n",
                        params->datafile, strerror(errno));
                exit(EXIT_FAILURE);
@@ -1684,11 +1960,11 @@ static void kwbimage_set_header(void *ptr, struct stat *sbuf, int ifd,
                 */
        case -1:
        case 0:
-               image = image_create_v0(&headersz, params, datasz + 4);
+               image = image_create_v0(&dataoff, params, datasz + 4);
                break;
 
        case 1:
-               image = image_create_v1(&headersz, params, ptr, datasz + 4);
+               image = image_create_v1(&dataoff, params, ptr, datasz + 4);
                break;
 
        default:
@@ -1706,19 +1982,20 @@ static void kwbimage_set_header(void *ptr, struct stat *sbuf, int ifd,
        free(image_cfg);
 
        /* Build and add image data checksum */
-       checksum = cpu_to_le32(image_checksum32((uint8_t *)ptr + headersz,
+       checksum = cpu_to_le32(image_checksum32((uint8_t *)ptr + dataoff,
                                                datasz));
-       memcpy((uint8_t *)ptr + headersz + datasz, &checksum, sizeof(uint32_t));
+       memcpy((uint8_t *)ptr + dataoff + datasz, &checksum, sizeof(uint32_t));
 
        /* Finally copy the header into the image area */
-       memcpy(ptr, image, headersz);
+       memcpy(ptr, image, kwbheader_size(image));
 
        free(image);
 }
 
-static void kwbimage_print_header(const void *ptr)
+static void kwbimage_print_header(const void *ptr, struct image_tool_params *params)
 {
        struct main_hdr_v0 *mhdr = (struct main_hdr_v0 *)ptr;
+       struct bin_hdr_v0 *bhdr;
        struct opt_hdr_v1 *ohdr;
 
        printf("Image Type:   MVEBU Boot from %s Image\n",
@@ -1727,16 +2004,38 @@ static void kwbimage_print_header(const void *ptr)
 
        for_each_opt_hdr_v1 (ohdr, mhdr) {
                if (ohdr->headertype == OPT_HDR_V1_BINARY_TYPE) {
-                       printf("BIN Hdr Size: ");
+                       printf("BIN Img Size: ");
                        genimg_print_size(opt_hdr_v1_size(ohdr) - 12 -
                                          4 * ohdr->data[0]);
+                       printf("BIN Img Offs: ");
+                       genimg_print_size(((uint8_t *)ohdr - (uint8_t *)mhdr) +
+                                         8 + 4 * ohdr->data[0]);
                }
        }
 
+       for_each_bin_hdr_v0(bhdr, mhdr) {
+               printf("BIN Img Size: ");
+               genimg_print_size(le32_to_cpu(bhdr->size));
+               printf("BIN Img Addr: %08x\n", le32_to_cpu(bhdr->destaddr));
+               printf("BIN Img Entr: %08x\n", le32_to_cpu(bhdr->execaddr));
+       }
+
        printf("Data Size:    ");
-       genimg_print_size(mhdr->blocksize - sizeof(uint32_t));
-       printf("Load Address: %08x\n", mhdr->destaddr);
-       printf("Entry Point:  %08x\n", mhdr->execaddr);
+       genimg_print_size(le32_to_cpu(mhdr->blocksize) - sizeof(uint32_t));
+       printf("Data Offset:  ");
+       if (mhdr->blockid == IBR_HDR_SATA_ID)
+               printf("%u Sector%s (LBA) = ", le32_to_cpu(mhdr->srcaddr),
+                      le32_to_cpu(mhdr->srcaddr) != 1 ? "s" : "");
+       genimg_print_size(le32_to_cpu(mhdr->srcaddr) * params->bl_len);
+       if (mhdr->blockid == IBR_HDR_SATA_ID)
+               printf("Sector Size:  %u Bytes\n", params->bl_len);
+       if (mhdr->blockid == IBR_HDR_SPI_ID && le32_to_cpu(mhdr->destaddr) == 0xFFFFFFFF) {
+               printf("Load Address: XIP\n");
+               printf("Execute Offs: %08x\n", le32_to_cpu(mhdr->execaddr));
+       } else {
+               printf("Load Address: %08x\n", le32_to_cpu(mhdr->destaddr));
+               printf("Entry Point:  %08x\n", le32_to_cpu(mhdr->execaddr));
+       }
 }
 
 static int kwbimage_check_image_types(uint8_t type)
@@ -1755,6 +2054,10 @@ static int kwbimage_verify_header(unsigned char *ptr, int image_size,
        uint32_t offset;
        uint32_t size;
        uint8_t csum;
+       int blksz;
+
+       if (header_size > 192*1024)
+               return -FDT_ERR_BADSTRUCTURE;
 
        if (header_size > image_size)
                return -FDT_ERR_BADSTRUCTURE;
@@ -1765,15 +2068,31 @@ static int kwbimage_verify_header(unsigned char *ptr, int image_size,
        /* Only version 0 extended header has checksum */
        if (kwbimage_version(ptr) == 0) {
                struct main_hdr_v0 *mhdr = (struct main_hdr_v0 *)ptr;
+               struct ext_hdr_v0 *ext_hdr;
+               struct bin_hdr_v0 *bhdr;
 
-               if (mhdr->ext & 0x1) {
-                       struct ext_hdr_v0 *ext_hdr = (void *)(mhdr + 1);
-
+               for_each_ext_hdr_v0(ext_hdr, ptr) {
                        csum = image_checksum8(ext_hdr, sizeof(*ext_hdr) - 1);
                        if (csum != ext_hdr->checksum)
                                return -FDT_ERR_BADSTRUCTURE;
                }
 
+               for_each_bin_hdr_v0(bhdr, ptr) {
+                       csum = image_checksum8(bhdr, (uint8_t *)&bhdr->checksum - (uint8_t *)bhdr - 1);
+                       if (csum != bhdr->checksum)
+                               return -FDT_ERR_BADSTRUCTURE;
+
+                       if (bhdr->offset > sizeof(*bhdr) || bhdr->offset % 4 != 0)
+                               return -FDT_ERR_BADSTRUCTURE;
+
+                       if (bhdr->offset + bhdr->size + 4 > sizeof(*bhdr) || bhdr->size % 4 != 0)
+                               return -FDT_ERR_BADSTRUCTURE;
+
+                       if (image_checksum32((uint8_t *)bhdr + bhdr->offset, bhdr->size) !=
+                           *(uint32_t *)((uint8_t *)bhdr + bhdr->offset + bhdr->size))
+                               return -FDT_ERR_BADSTRUCTURE;
+               }
+
                blockid = mhdr->blockid;
                offset = le32_to_cpu(mhdr->srcaddr);
                size = le32_to_cpu(mhdr->blocksize);
@@ -1794,26 +2113,28 @@ static int kwbimage_verify_header(unsigned char *ptr, int image_size,
                return -FDT_ERR_BADSTRUCTURE;
        }
 
+       if (size < 4 || size % 4 != 0)
+               return -FDT_ERR_BADSTRUCTURE;
+
        /*
         * For SATA srcaddr is specified in number of sectors.
-        * The main header is must be stored at sector number 1.
-        * This expects that sector size is 512 bytes and recalculates
-        * data offset to bytes relative to the main header.
+        * Try all possible sector sizes which are power of two,
+        * at least 512 bytes and up to the 32 kB.
         */
        if (blockid == IBR_HDR_SATA_ID) {
-               if (offset < 1)
-                       return -FDT_ERR_BADSTRUCTURE;
-               offset -= 1;
-               offset *= 512;
-       }
+               for (blksz = 512; blksz < 0x10000; blksz *= 2) {
+                       if (offset * blksz > image_size || offset * blksz + size > image_size)
+                               break;
 
-       /*
-        * For SDIO srcaddr is specified in number of sectors.
-        * This expects that sector size is 512 bytes and recalculates
-        * data offset to bytes.
-        */
-       if (blockid == IBR_HDR_SDIO_ID)
-               offset *= 512;
+                       if (image_checksum32(ptr + offset * blksz, size - 4) ==
+                           *(uint32_t *)(ptr + offset * blksz + size - 4)) {
+                               params->bl_len = blksz;
+                               return 0;
+                       }
+               }
+
+               return -FDT_ERR_BADSTRUCTURE;
+       }
 
        /*
         * For PCIe srcaddr is always set to 0xFFFFFFFF.
@@ -1822,16 +2143,14 @@ static int kwbimage_verify_header(unsigned char *ptr, int image_size,
        if (blockid == IBR_HDR_PEX_ID && offset == 0xFFFFFFFF)
                offset = header_size;
 
-       if (offset > image_size || offset % 4 != 0)
-               return -FDT_ERR_BADSTRUCTURE;
-
-       if (size < 4 || offset + size > image_size || size % 4 != 0)
+       if (offset % 4 != 0 || offset > image_size || offset + size > image_size)
                return -FDT_ERR_BADSTRUCTURE;
 
        if (image_checksum32(ptr + offset, size - 4) !=
            *(uint32_t *)(ptr + offset + size - 4))
                return -FDT_ERR_BADSTRUCTURE;
 
+       params->bl_len = 1;
        return 0;
 }
 
@@ -1845,6 +2164,8 @@ static int kwbimage_generate(struct image_tool_params *params,
        int version;
        void *hdr;
        int ret;
+       int align, size;
+       unsigned int satablksz;
 
        fcfg = fopen(params->imagename, "r");
        if (!fcfg) {
@@ -1853,7 +2174,9 @@ static int kwbimage_generate(struct image_tool_params *params,
                exit(EXIT_FAILURE);
        }
 
-       if (stat(params->datafile, &s)) {
+       if (params->skipcpy) {
+               s.st_size = 0;
+       } else if (stat(params->datafile, &s)) {
                fprintf(stderr, "Could not stat data file %s: %s\n",
                        params->datafile, strerror(errno));
                exit(EXIT_FAILURE);
@@ -1880,6 +2203,7 @@ static int kwbimage_generate(struct image_tool_params *params,
 
        bootfrom = image_get_bootfrom();
        version = image_get_version();
+       satablksz = image_get_satablksz();
        switch (version) {
                /*
                 * Fallback to version 0 if no version is provided in the
@@ -1892,6 +2216,15 @@ static int kwbimage_generate(struct image_tool_params *params,
 
        case 1:
                alloc_len = image_headersz_v1(NULL);
+               if (!alloc_len) {
+                       free(image_cfg);
+                       exit(EXIT_FAILURE);
+               }
+               if (alloc_len > 192*1024) {
+                       fprintf(stderr, "Header is too big (%u bytes), maximal kwbimage header size is %u bytes\n", alloc_len, 192*1024);
+                       free(image_cfg);
+                       exit(EXIT_FAILURE);
+               }
                break;
 
        default:
@@ -1900,6 +2233,8 @@ static int kwbimage_generate(struct image_tool_params *params,
                exit(EXIT_FAILURE);
        }
 
+       alloc_len = image_headersz_align(alloc_len, image_get_bootfrom());
+
        free(image_cfg);
 
        hdr = malloc(alloc_len);
@@ -1914,88 +2249,346 @@ static int kwbimage_generate(struct image_tool_params *params,
        tparams->hdr = hdr;
 
        /*
-        * The resulting image needs to be 4-byte aligned. At least
-        * the Marvell hdrparser tool complains if its unaligned.
-        * After the image data is stored 4-byte checksum.
-        * Final UART image must be aligned to 128 bytes.
+        * Final SATA images must be aligned to disk block size.
+        * Final SDIO images must be aligned to 512 bytes.
         * Final SPI and NAND images must be aligned to 256 bytes.
-        * Final SATA and SDIO images must be aligned to 512 bytes.
+        * Final UART image must be aligned to 128 bytes.
         */
-       if (bootfrom == IBR_HDR_SPI_ID || bootfrom == IBR_HDR_NAND_ID)
-               return 4 + (256 - (alloc_len + s.st_size + 4) % 256) % 256;
-       else if (bootfrom == IBR_HDR_SATA_ID || bootfrom == IBR_HDR_SDIO_ID)
-               return 4 + (512 - (alloc_len + s.st_size + 4) % 512) % 512;
+       if (bootfrom == IBR_HDR_SATA_ID)
+               align = satablksz;
+       else if (bootfrom == IBR_HDR_SDIO_ID)
+               align = 512;
+       else if (bootfrom == IBR_HDR_SPI_ID || bootfrom == IBR_HDR_NAND_ID)
+               align = 256;
        else if (bootfrom == IBR_HDR_UART_ID)
-               return 4 + (128 - (alloc_len + s.st_size + 4) % 128) % 128;
+               align = 128;
        else
-               return 4 + (4 - s.st_size % 4) % 4;
+               align = 4;
+
+       /*
+        * The resulting image needs to be 4-byte aligned. At least
+        * the Marvell hdrparser tool complains if its unaligned.
+        * After the image data is stored 4-byte checksum.
+        */
+       size = 4 + (align - (alloc_len + s.st_size + 4) % align) % align;
+
+       /*
+        * This function should return aligned size of the datafile.
+        * When skipcpy is set (datafile is skipped) then return value of this
+        * function is ignored, so we have to put required kwbimage aligning
+        * into the preallocated header size.
+        */
+       if (params->skipcpy) {
+               tparams->header_size += size;
+               return 0;
+       } else {
+               return size;
+       }
+}
+
+static int kwbimage_generate_config(void *ptr, struct image_tool_params *params)
+{
+       struct main_hdr_v0 *mhdr0 = (struct main_hdr_v0 *)ptr;
+       struct main_hdr_v1 *mhdr = (struct main_hdr_v1 *)ptr;
+       size_t header_size = kwbheader_size(ptr);
+       struct register_set_hdr_v1 *regset_hdr;
+       struct ext_hdr_v0_reg *regdata;
+       struct ext_hdr_v0 *ehdr0;
+       struct bin_hdr_v0 *bhdr0;
+       struct opt_hdr_v1 *ohdr;
+       int regset_count;
+       int params_count;
+       unsigned offset;
+       int is_v0_ext;
+       int cur_idx;
+       int version;
+       FILE *f;
+       int i;
+
+       f = fopen(params->outfile, "w");
+       if (!f) {
+               fprintf(stderr, "Can't open \"%s\": %s\n", params->outfile, strerror(errno));
+               return -1;
+       }
+
+       version = kwbimage_version(ptr);
+
+       is_v0_ext = 0;
+       if (version == 0) {
+               if (mhdr0->ext > 1 || mhdr0->bin ||
+                   ((ehdr0 = ext_hdr_v0_first(ptr)) &&
+                    (ehdr0->match_addr || ehdr0->match_mask || ehdr0->match_value)))
+                       is_v0_ext = 1;
+       }
+
+       if (version != 0)
+               fprintf(f, "VERSION %d\n", version);
+
+       fprintf(f, "BOOT_FROM %s\n", image_boot_mode_name(mhdr->blockid) ?: "<unknown>");
+
+       if (version == 0 && mhdr->blockid == IBR_HDR_NAND_ID)
+               fprintf(f, "NAND_ECC_MODE %s\n", image_nand_ecc_mode_name(mhdr0->nandeccmode));
+
+       if (mhdr->blockid == IBR_HDR_NAND_ID)
+               fprintf(f, "NAND_PAGE_SIZE 0x%x\n", (unsigned)le16_to_cpu(mhdr->nandpagesize));
+
+       if (mhdr->blockid == IBR_HDR_NAND_ID && (version != 0 || is_v0_ext || mhdr->nandblocksize != 0)) {
+               if (mhdr->nandblocksize != 0) /* block size explicitly set in 64 kB unit */
+                       fprintf(f, "NAND_BLKSZ 0x%x\n", (unsigned)mhdr->nandblocksize * 64*1024);
+               else if (le16_to_cpu(mhdr->nandpagesize) > 512)
+                       fprintf(f, "NAND_BLKSZ 0x10000\n"); /* large page NAND flash = 64 kB block size */
+               else
+                       fprintf(f, "NAND_BLKSZ 0x4000\n"); /* small page NAND flash = 16 kB block size */
+       }
+
+       if (mhdr->blockid == IBR_HDR_NAND_ID && (version != 0 || is_v0_ext))
+               fprintf(f, "NAND_BADBLK_LOCATION 0x%x\n", (unsigned)mhdr->nandbadblklocation);
+
+       if (version == 0 && mhdr->blockid == IBR_HDR_SATA_ID)
+               fprintf(f, "SATA_PIO_MODE %u\n", (unsigned)mhdr0->satapiomode);
+
+       if (mhdr->blockid == IBR_HDR_SATA_ID)
+               fprintf(f, "SATA_BLKSZ %u\n", params->bl_len);
+
+       /*
+        * Addresses and sizes which are specified by mkimage command line
+        * arguments and not in kwbimage config file
+        */
+
+       if (version != 0)
+               fprintf(f, "#HEADER_SIZE 0x%x\n",
+                       ((unsigned)mhdr->headersz_msb << 8) | le16_to_cpu(mhdr->headersz_lsb));
+
+       fprintf(f, "#SRC_ADDRESS 0x%x\n", le32_to_cpu(mhdr->srcaddr));
+       fprintf(f, "#BLOCK_SIZE 0x%x\n", le32_to_cpu(mhdr->blocksize));
+       fprintf(f, "#DEST_ADDRESS 0x%08x\n", le32_to_cpu(mhdr->destaddr));
+       fprintf(f, "#EXEC_ADDRESS 0x%08x\n", le32_to_cpu(mhdr->execaddr));
+
+       if (version != 0) {
+               if (options_to_baudrate(mhdr->options))
+                       fprintf(f, "BAUDRATE %u\n", options_to_baudrate(mhdr->options));
+               if (options_to_baudrate(mhdr->options) ||
+                   ((mhdr->options >> 3) & 0x3) || ((mhdr->options >> 5) & 0x7)) {
+                       fprintf(f, "UART_PORT %u\n", (unsigned)((mhdr->options >> 3) & 0x3));
+                       fprintf(f, "UART_MPP 0x%x\n", (unsigned)((mhdr->options >> 5) & 0x7));
+               }
+               if (mhdr->flags & 0x1)
+                       fprintf(f, "DEBUG 1\n");
+       }
+
+       cur_idx = 1;
+       for_each_opt_hdr_v1(ohdr, ptr) {
+               if (ohdr->headertype == OPT_HDR_V1_SECURE_TYPE) {
+                       fprintf(f, "#SECURE_HEADER\n");
+               } else if (ohdr->headertype == OPT_HDR_V1_BINARY_TYPE) {
+                       fprintf(f, "BINARY binary%d.bin", cur_idx);
+                       for (i = 0; i < ohdr->data[0]; i++)
+                               fprintf(f, " 0x%x", le32_to_cpu(((uint32_t *)ohdr->data)[i + 1]));
+                       offset = (unsigned)((uint8_t *)ohdr - (uint8_t *)mhdr) + 8 + 4 * ohdr->data[0];
+                       fprintf(f, " LOAD_ADDRESS 0x%08x\n", 0x40000000 + offset);
+                       fprintf(f, " # for CPU SHEEVA: LOAD_ADDRESS 0x%08x\n", 0x40004000 + offset);
+                       cur_idx++;
+               } else if (ohdr->headertype == OPT_HDR_V1_REGISTER_TYPE) {
+                       regset_hdr = (struct register_set_hdr_v1 *)ohdr;
+                       if (opt_hdr_v1_size(ohdr) > sizeof(*ohdr))
+                               regset_count = (opt_hdr_v1_size(ohdr) - sizeof(*ohdr)) /
+                                              sizeof(regset_hdr->data[0].entry);
+                       else
+                               regset_count = 0;
+                       for (i = 0; i < regset_count; i++)
+                               fprintf(f, "DATA 0x%08x 0x%08x\n",
+                                       le32_to_cpu(regset_hdr->data[i].entry.address),
+                                       le32_to_cpu(regset_hdr->data[i].entry.value));
+                       if (regset_count > 0) {
+                               if (regset_hdr->data[regset_count-1].last_entry.delay !=
+                                   REGISTER_SET_HDR_OPT_DELAY_SDRAM_SETUP)
+                                       fprintf(f, "DATA_DELAY %u\n",
+                                               (unsigned)regset_hdr->data[regset_count-1].last_entry.delay);
+                               else
+                                       fprintf(f, "DATA_DELAY SDRAM_SETUP\n");
+                       }
+               }
+       }
+
+       if (version == 0 && !is_v0_ext && le16_to_cpu(mhdr0->ddrinitdelay))
+               fprintf(f, "DDR_INIT_DELAY %u\n", (unsigned)le16_to_cpu(mhdr0->ddrinitdelay));
+
+       for_each_ext_hdr_v0(ehdr0, ptr) {
+               if (is_v0_ext) {
+                       fprintf(f, "\nMATCH ADDRESS 0x%08x MASK 0x%08x VALUE 0x%08x\n",
+                               le32_to_cpu(ehdr0->match_addr),
+                               le32_to_cpu(ehdr0->match_mask),
+                               le32_to_cpu(ehdr0->match_value));
+                       if (ehdr0->rsvd1[0] || ehdr0->rsvd1[1] || ehdr0->rsvd1[2] ||
+                           ehdr0->rsvd1[3] || ehdr0->rsvd1[4] || ehdr0->rsvd1[5] ||
+                           ehdr0->rsvd1[6] || ehdr0->rsvd1[7])
+                               fprintf(f, "#DDR_RSVD1 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+                                       ehdr0->rsvd1[0], ehdr0->rsvd1[1], ehdr0->rsvd1[2],
+                                       ehdr0->rsvd1[3], ehdr0->rsvd1[4], ehdr0->rsvd1[5],
+                                       ehdr0->rsvd1[6], ehdr0->rsvd1[7]);
+                       if (ehdr0->rsvd2[0] || ehdr0->rsvd2[1] || ehdr0->rsvd2[2] ||
+                           ehdr0->rsvd2[3] || ehdr0->rsvd2[4] || ehdr0->rsvd2[5] ||
+                           ehdr0->rsvd2[6])
+                               fprintf(f, "#DDR_RSVD2 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+                                       ehdr0->rsvd2[0], ehdr0->rsvd2[1], ehdr0->rsvd2[2],
+                                       ehdr0->rsvd2[3], ehdr0->rsvd2[4], ehdr0->rsvd2[5],
+                                       ehdr0->rsvd2[6]);
+                       if (ehdr0->ddrwritetype)
+                               fprintf(f, "DDR_WRITE_TYPE %u\n", (unsigned)ehdr0->ddrwritetype);
+                       if (ehdr0->ddrresetmpp)
+                               fprintf(f, "DDR_RESET_MPP 0x%x\n", (unsigned)ehdr0->ddrresetmpp);
+                       if (ehdr0->ddrclkenmpp)
+                               fprintf(f, "DDR_CLKEN_MPP 0x%x\n", (unsigned)ehdr0->ddrclkenmpp);
+                       if (ehdr0->ddrinitdelay)
+                               fprintf(f, "DDR_INIT_DELAY %u\n", (unsigned)ehdr0->ddrinitdelay);
+               }
+
+               if (ehdr0->offset) {
+                       for (regdata = (struct ext_hdr_v0_reg *)((uint8_t *)ptr + ehdr0->offset);
+                            (uint8_t *)regdata < (uint8_t *)ptr + header_size &&
+                            (regdata->raddr || regdata->rdata);
+                            regdata++)
+                               fprintf(f, "DATA 0x%08x 0x%08x\n", le32_to_cpu(regdata->raddr),
+                                       le32_to_cpu(regdata->rdata));
+                       if ((uint8_t *)regdata != (uint8_t *)ptr + ehdr0->offset)
+                               fprintf(f, "DATA 0x0 0x0\n");
+               }
+
+               if (le32_to_cpu(ehdr0->enddelay))
+                       fprintf(f, "DATA_DELAY %u\n", le32_to_cpu(ehdr0->enddelay));
+               else if (is_v0_ext)
+                       fprintf(f, "DATA_DELAY SDRAM_SETUP\n");
+       }
+
+       cur_idx = 1;
+       for_each_bin_hdr_v0(bhdr0, ptr) {
+               fprintf(f, "\nMATCH ADDRESS 0x%08x MASK 0x%08x VALUE 0x%08x\n",
+                       le32_to_cpu(bhdr0->match_addr),
+                       le32_to_cpu(bhdr0->match_mask),
+                       le32_to_cpu(bhdr0->match_value));
+
+               fprintf(f, "BINARY binary%d.bin", cur_idx);
+               params_count = fls4(bhdr0->params_flags & 0xF);
+               for (i = 0; i < params_count; i++)
+                       fprintf(f, " 0x%x", (bhdr0->params[i] & (1 << i)) ? bhdr0->params[i] : 0);
+               fprintf(f, " LOAD_ADDRESS 0x%08x", le32_to_cpu(bhdr0->destaddr));
+               fprintf(f, " EXEC_ADDRESS 0x%08x", le32_to_cpu(bhdr0->execaddr));
+               fprintf(f, "\n");
+
+               fprintf(f, "#BINARY_OFFSET 0x%x\n", le32_to_cpu(bhdr0->offset));
+               fprintf(f, "#BINARY_SIZE 0x%x\n", le32_to_cpu(bhdr0->size));
+
+               if (bhdr0->rsvd1)
+                       fprintf(f, "#BINARY_RSVD1 0x%x\n", (unsigned)bhdr0->rsvd1);
+               if (bhdr0->rsvd2)
+                       fprintf(f, "#BINARY_RSVD2 0x%x\n", (unsigned)bhdr0->rsvd2);
+
+               cur_idx++;
+       }
+
+       /* Undocumented reserved fields */
+
+       if (version == 0 && (mhdr0->rsvd1[0] || mhdr0->rsvd1[1] || mhdr0->rsvd1[2]))
+               fprintf(f, "#RSVD1 0x%x 0x%x 0x%x\n", (unsigned)mhdr0->rsvd1[0],
+                       (unsigned)mhdr0->rsvd1[1], (unsigned)mhdr0->rsvd1[2]);
+
+       if (version == 0 && le16_to_cpu(mhdr0->rsvd2))
+               fprintf(f, "#RSVD2 0x%x\n", (unsigned)le16_to_cpu(mhdr0->rsvd2));
+
+       if (version != 0 && mhdr->reserved4)
+               fprintf(f, "#RESERVED4 0x%x\n", (unsigned)mhdr->reserved4);
+
+       if (version != 0 && mhdr->reserved5)
+               fprintf(f, "#RESERVED5 0x%x\n", (unsigned)le16_to_cpu(mhdr->reserved5));
+
+       fclose(f);
+
+       return 0;
 }
 
 static int kwbimage_extract_subimage(void *ptr, struct image_tool_params *params)
 {
        struct main_hdr_v1 *mhdr = (struct main_hdr_v1 *)ptr;
        size_t header_size = kwbheader_size(ptr);
+       struct bin_hdr_v0 *bhdr;
        struct opt_hdr_v1 *ohdr;
        int idx = params->pflag;
-       int cur_idx = 0;
+       int cur_idx;
        uint32_t offset;
        ulong image;
        ulong size;
 
-       for_each_opt_hdr_v1 (ohdr, ptr) {
-               if (ohdr->headertype != OPT_HDR_V1_BINARY_TYPE)
-                       continue;
+       /* Generate kwbimage config file when '-p -1' is specified */
+       if (idx == -1)
+               return kwbimage_generate_config(ptr, params);
 
-               if (idx == cur_idx) {
-                       image = (ulong)&ohdr->data[4 + 4 * ohdr->data[0]];
-                       size = opt_hdr_v1_size(ohdr) - 12 - 4 * ohdr->data[0];
-                       goto extract;
-               }
+       image = 0;
+       size = 0;
 
-               ++cur_idx;
-       }
+       if (idx == 0) {
+               /* Extract data image when -p is not specified or when '-p 0' is specified */
+               offset = le32_to_cpu(mhdr->srcaddr);
 
-       if (idx != cur_idx) {
-               printf("Image %d is not present\n", idx);
-               return -1;
-       }
+               if (mhdr->blockid == IBR_HDR_SATA_ID)
+                       offset *= params->bl_len;
 
-       offset = le32_to_cpu(mhdr->srcaddr);
+               if (mhdr->blockid == IBR_HDR_PEX_ID && offset == 0xFFFFFFFF)
+                       offset = header_size;
 
-       if (mhdr->blockid == IBR_HDR_SATA_ID) {
-               offset -= 1;
-               offset *= 512;
-       }
+               image = (ulong)((uint8_t *)ptr + offset);
+               size = le32_to_cpu(mhdr->blocksize) - 4;
+       } else {
+               /* Extract N-th binary header executabe image when other '-p N' is specified */
+               cur_idx = 1;
+               for_each_opt_hdr_v1(ohdr, ptr) {
+                       if (ohdr->headertype != OPT_HDR_V1_BINARY_TYPE)
+                               continue;
 
-       if (mhdr->blockid == IBR_HDR_SDIO_ID)
-               offset *= 512;
+                       if (idx == cur_idx) {
+                               image = (ulong)&ohdr->data[4 + 4 * ohdr->data[0]];
+                               size = opt_hdr_v1_size(ohdr) - 12 - 4 * ohdr->data[0];
+                               break;
+                       }
 
-       if (mhdr->blockid == IBR_HDR_PEX_ID && offset == 0xFFFFFFFF)
-               offset = header_size;
+                       ++cur_idx;
+               }
+               for_each_bin_hdr_v0(bhdr, ptr) {
+                       if (idx == cur_idx) {
+                               image = (ulong)bhdr + bhdr->offset;
+                               size = bhdr->size;
+                               break;
+                       }
+                       ++cur_idx;
+               }
 
-       image = (ulong)((uint8_t *)ptr + offset);
-       size = le32_to_cpu(mhdr->blocksize) - 4;
+               if (!image) {
+                       fprintf(stderr, "Argument -p %d is invalid\n", idx);
+                       fprintf(stderr, "Available subimages:\n");
+                       fprintf(stderr, " -p -1  - kwbimage config file\n");
+                       fprintf(stderr, " -p 0   - data image\n");
+                       if (cur_idx - 1 > 0)
+                               fprintf(stderr, " -p N   - Nth binary header image (totally: %d)\n",
+                                       cur_idx - 1);
+                       return -1;
+               }
+       }
 
-extract:
        return imagetool_save_subimage(params->outfile, image, size);
 }
 
-/*
- * Report Error if xflag is set in addition to default
- */
 static int kwbimage_check_params(struct image_tool_params *params)
 {
-       if (!params->iflag && (!params->imagename || !strlen(params->imagename))) {
+       if (!params->lflag && !params->iflag && !params->pflag &&
+           (!params->imagename || !strlen(params->imagename))) {
                char *msg = "Configuration file for kwbimage creation omitted";
 
                fprintf(stderr, "Error:%s - %s\n", params->cmdname, msg);
                return 1;
        }
 
-       return (params->dflag && (params->fflag || params->lflag)) ||
-               (params->fflag && (params->dflag || params->lflag)) ||
-               (params->lflag && (params->dflag || params->fflag)) ||
-               (params->xflag);
+       return (params->dflag && (params->fflag || params->lflag || params->skipcpy)) ||
+               (params->fflag) ||
+               (params->lflag && (params->dflag || params->fflag));
 }
 
 /*