]> git.baikalelectronics.ru Git - uboot.git/commitdiff
tools: kwboot: Support higher baudrates when booting via UART
authorPali Rohár <pali@kernel.org>
Fri, 24 Sep 2021 21:07:05 +0000 (23:07 +0200)
committerStefan Roese <sr@denx.de>
Fri, 1 Oct 2021 09:07:13 +0000 (11:07 +0200)
Add support for uploading the boot image (the data part only) at higher
baudrate than the standard one.

The kwboot utility already has -B option, but choosing other baudrate
than the standard one (115200 Bd) can only work for debug mode, not for
booting the device. The BootROM for kwboot supported platforms (Orion,
Kirkwood, Dove, Discovery, AXP, A37x, A38x, A39x) cannot change the
baudrate when uploading boot image via the Xmodem protocol, nor can it
be configured via strapping pins.

So instead we add this support by injecting baudrate changing code into
the kwbimage v1 header as a new optional binary extension. This code is
executed by BootROM after it receives the whole header. The code sends
the magic string "$baudratechange\0" just before changing the baudrate
to let kwboot know that it should also change it. This is because the
injected code is run as the last binary extension, and we do not want
to loose possible output from other possible binary extensions that
came before it (in most cases this is U-Boot SPL).

We also inject the code before the payload (the data part of the image),
to change the baudrate back to the standard value, in case the payload
does not reset UART.

This change improves boot time via UART significantly (depending on the
chosen baudrate), which is very useful when debugging.

Signed-off-by: Pali Rohár <pali@kernel.org>
[ major refactor ]
Signed-off-by: Marek Behún <marek.behun@nic.cz>
Reviewed-by: Stefan Roese <sr@denx.de>
tools/kwboot.c

index 77bf5cb80b61b73815ceaf72a9c09d16ca6ebe06..ba2fd10ff645183851a37926516022762a0a9f94 100644 (file)
@@ -70,6 +70,187 @@ struct kwboot_block {
 #define KWBOOT_BLK_RSP_TIMEO 1000 /* ms */
 #define KWBOOT_HDR_RSP_TIMEO 10000 /* ms */
 
+/* ARM code making baudrate changing function return to original exec address */
+static unsigned char kwboot_pre_baud_code[] = {
+                               /* exec_addr:                                 */
+       0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+       0x0c, 0xe0, 0x1f, 0xe5, /* ldr lr, exec_addr                          */
+};
+
+/* ARM code for binary header injection to change baudrate */
+static unsigned char kwboot_baud_code[] = {
+                               /* ; #define UART_BASE 0xd0012000             */
+                               /* ; #define THR       0x00                   */
+                               /* ; #define DLL       0x00                   */
+                               /* ; #define DLH       0x04                   */
+                               /* ; #define LCR       0x0c                   */
+                               /* ; #define   DLAB    0x80                   */
+                               /* ; #define LSR       0x14                   */
+                               /* ; #define   THRE    0x20                   */
+                               /* ; #define   TEMT    0x40                   */
+                               /* ; #define DIV_ROUND(a, b) ((a + b/2) / b)  */
+                               /* ;                                          */
+                               /* ; u32 set_baudrate(u32 old_b, u32 new_b) { */
+                               /* ;   const u8 *str = "$baudratechange";     */
+                               /* ;   u8 c;                                  */
+                               /* ;   do {                                   */
+                               /* ;       c = *str++;                        */
+                               /* ;       writel(UART_BASE + THR, c);        */
+                               /* ;   } while (c);                           */
+                               /* ;   while                                  */
+                               /* ;      (!(readl(UART_BASE + LSR) & TEMT)); */
+                               /* ;   u32 lcr = readl(UART_BASE + LCR);      */
+                               /* ;   writel(UART_BASE + LCR, lcr | DLAB);   */
+                               /* ;   u8 old_dll = readl(UART_BASE + DLL);   */
+                               /* ;   u8 old_dlh = readl(UART_BASE + DLH);   */
+                               /* ;   u16 old_dl = old_dll | (old_dlh << 8); */
+                               /* ;   u32 clk = old_b * old_dl;              */
+                               /* ;   u16 new_dl = DIV_ROUND(clk, new_b);    */
+                               /* ;   u8 new_dll = new_dl & 0xff;            */
+                               /* ;   u8 new_dlh = (new_dl >> 8) & 0xff;     */
+                               /* ;   writel(UART_BASE + DLL, new_dll);      */
+                               /* ;   writel(UART_BASE + DLH, new_dlh);      */
+                               /* ;   writel(UART_BASE + LCR, lcr & ~DLAB);  */
+                               /* ;   msleep(1);                             */
+                               /* ;   return 0;                              */
+                               /* ; }                                        */
+
+       0xfe, 0x5f, 0x2d, 0xe9, /* push  { r1 - r12, lr }                     */
+
+                               /*  ; r0 = UART_BASE                          */
+       0x02, 0x0a, 0xa0, 0xe3, /* mov   r0, #0x2000                          */
+       0x01, 0x00, 0x4d, 0xe3, /* movt  r0, #0xd001                          */
+
+                               /*  ; r2 = address of preamble string         */
+       0xd0, 0x20, 0x8f, 0xe2, /* adr   r2, preamble                         */
+
+                               /*  ; Send preamble string over UART          */
+                               /* .Lloop_preamble:                           */
+                               /*                                            */
+                               /*  ; Wait until Transmitter Holding is Empty */
+                               /* .Lloop_thre:                               */
+                               /*  ; r1 = UART_BASE[LSR] & THRE              */
+       0x14, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x14]                      */
+       0x20, 0x00, 0x11, 0xe3, /* tst   r1, #0x20                            */
+       0xfc, 0xff, 0xff, 0x0a, /* beq   .Lloop_thre                          */
+
+                               /*  ; Put character into Transmitter FIFO     */
+                               /*  ; r1 = *r2++                              */
+       0x01, 0x10, 0xd2, 0xe4, /* ldrb  r1, [r2], #1                         */
+                               /*  ; UART_BASE[THR] = r1                     */
+       0x00, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0]                       */
+
+                               /*  ; Loop until end of preamble string       */
+       0x00, 0x00, 0x51, 0xe3, /* cmp   r1, #0                               */
+       0xf8, 0xff, 0xff, 0x1a, /* bne   .Lloop_preamble                      */
+
+                               /*  ; Wait until Transmitter FIFO is Empty    */
+                               /* .Lloop_txempty:                            */
+                               /*  ; r1 = UART_BASE[LSR] & TEMT              */
+       0x14, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x14]                      */
+       0x40, 0x00, 0x11, 0xe3, /* tst   r1, #0x40                            */
+       0xfc, 0xff, 0xff, 0x0a, /* beq   .Lloop_txempty                       */
+
+                               /*  ; Set Divisor Latch Access Bit            */
+                               /*  ; UART_BASE[LCR] |= DLAB                  */
+       0x0c, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x0c]                      */
+       0x80, 0x10, 0x81, 0xe3, /* orr   r1, r1, #0x80                        */
+       0x0c, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0c]                      */
+
+                               /*  ; Read current Divisor Latch              */
+                               /*  ; r1 = UART_BASE[DLH]<<8 | UART_BASE[DLL] */
+       0x00, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x00]                      */
+       0xff, 0x10, 0x01, 0xe2, /* and   r1, r1, #0xff                        */
+       0x01, 0x20, 0xa0, 0xe1, /* mov   r2, r1                               */
+       0x04, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x04]                      */
+       0xff, 0x10, 0x01, 0xe2, /* and   r1, r1, #0xff                        */
+       0x41, 0x14, 0xa0, 0xe1, /* asr   r1, r1, #8                           */
+       0x02, 0x10, 0x81, 0xe1, /* orr   r1, r1, r2                           */
+
+                               /*  ; Read old baudrate value                 */
+                               /*  ; r2 = old_baudrate                       */
+       0x8c, 0x20, 0x9f, 0xe5, /* ldr   r2, old_baudrate                     */
+
+                               /*  ; Calculate base clock                    */
+                               /*  ; r1 = r2 * r1                            */
+       0x92, 0x01, 0x01, 0xe0, /* mul   r1, r2, r1                           */
+
+                               /*  ; Read new baudrate value                 */
+                               /*  ; r2 = baudrate                           */
+       0x88, 0x20, 0x9f, 0xe5, /* ldr   r2, baudrate                         */
+
+                               /*  ; Calculate new Divisor Latch             */
+                               /*  ; r1 = DIV_ROUND(r1, r2) =                */
+                               /*  ;    = (r1 + r2/2) / r2                   */
+       0xa2, 0x10, 0x81, 0xe0, /* add   r1, r1, r2, lsr #1                   */
+       0x02, 0x40, 0xa0, 0xe1, /* mov   r4, r2                               */
+       0xa1, 0x00, 0x54, 0xe1, /* cmp   r4, r1, lsr #1                       */
+                               /* .Lloop_div1:                               */
+       0x84, 0x40, 0xa0, 0x91, /* movls r4, r4, lsl #1                       */
+       0xa1, 0x00, 0x54, 0xe1, /* cmp   r4, r1, lsr #1                       */
+       0xfc, 0xff, 0xff, 0x9a, /* bls   .Lloop_div1                          */
+       0x00, 0x30, 0xa0, 0xe3, /* mov   r3, #0                               */
+                               /* .Lloop_div2:                               */
+       0x04, 0x00, 0x51, 0xe1, /* cmp   r1, r4                               */
+       0x04, 0x10, 0x41, 0x20, /* subhs r1, r1, r4                           */
+       0x03, 0x30, 0xa3, 0xe0, /* adc   r3, r3, r3                           */
+       0xa4, 0x40, 0xa0, 0xe1, /* mov   r4, r4, lsr #1                       */
+       0x02, 0x00, 0x54, 0xe1, /* cmp   r4, r2                               */
+       0xf9, 0xff, 0xff, 0x2a, /* bhs   .Lloop_div2                          */
+       0x03, 0x10, 0xa0, 0xe1, /* mov   r1, r3                               */
+
+                               /*  ; Set new Divisor Latch Low               */
+                               /*  ; UART_BASE[DLL] = r1 & 0xff              */
+       0x01, 0x20, 0xa0, 0xe1, /* mov   r2, r1                               */
+       0xff, 0x20, 0x02, 0xe2, /* and   r2, r2, #0xff                        */
+       0x00, 0x20, 0x80, 0xe5, /* str   r2, [r0, #0x00]                      */
+
+                               /*  ; Set new Divisor Latch High              */
+                               /*  ; UART_BASE[DLH] = r1>>8 & 0xff           */
+       0x41, 0x24, 0xa0, 0xe1, /* asr   r2, r1, #8                           */
+       0xff, 0x20, 0x02, 0xe2, /* and   r2, r2, #0xff                        */
+       0x04, 0x20, 0x80, 0xe5, /* str   r2, [r0, #0x04]                      */
+
+                               /*  ; Clear Divisor Latch Access Bit          */
+                               /*  ; UART_BASE[LCR] &= ~DLAB                 */
+       0x0c, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x0c]                      */
+       0x80, 0x10, 0xc1, 0xe3, /* bic   r1, r1, #0x80                        */
+       0x0c, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0c]                      */
+
+                               /*  ; Sleep 1ms ~~ 600000 cycles at 1200 MHz  */
+                               /*  ; r1 = 600000                             */
+       0x9f, 0x1d, 0xa0, 0xe3, /* mov   r1, #0x27c0                          */
+       0x09, 0x10, 0x40, 0xe3, /* movt  r1, #0x0009                          */
+                               /* .Lloop_sleep:                              */
+       0x01, 0x10, 0x41, 0xe2, /* sub   r1, r1, #1                           */
+       0x00, 0x00, 0x51, 0xe3, /* cmp   r1, #0                               */
+       0xfc, 0xff, 0xff, 0x1a, /* bne   .Lloop_sleep                         */
+
+                               /*  ; Return 0 - no error                     */
+       0x00, 0x00, 0xa0, 0xe3, /* mov   r0, #0                               */
+       0xfe, 0x9f, 0xbd, 0xe8, /* pop   { r1 - r12, pc }                     */
+
+                               /*  ; Preamble string                         */
+                               /* preamble:                                  */
+       0x24, 0x62, 0x61, 0x75, /* .asciz "$baudratechange"                   */
+       0x64, 0x72, 0x61, 0x74,
+       0x65, 0x63, 0x68, 0x61,
+       0x6e, 0x67, 0x65, 0x00,
+
+                               /*  ; Placeholder for old baudrate value      */
+                               /* old_baudrate:                              */
+       0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+
+                               /*  ; Placeholder for new baudrate value      */
+                               /* new_baudrate:                              */
+       0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+};
+
+#define KWBOOT_BAUDRATE_BIN_HEADER_SZ (sizeof(kwboot_baud_code) + \
+                                      sizeof(struct opt_hdr_v1) + 8)
+
+static const char kwb_baud_magic[16] = "$baudratechange";
+
 static int kwboot_verbose;
 
 static int msg_req_delay = KWBOOT_MSG_REQ_DELAY;
@@ -233,26 +414,184 @@ kwboot_tty_send_char(int fd, unsigned char c)
 }
 
 static speed_t
-kwboot_tty_speed(int baudrate)
+kwboot_tty_baudrate_to_speed(int baudrate)
 {
        switch (baudrate) {
+#ifdef B4000000
+       case 4000000:
+               return B4000000;
+#endif
+#ifdef B3500000
+       case 3500000:
+               return B3500000;
+#endif
+#ifdef B3000000
+       case 3000000:
+               return B3000000;
+#endif
+#ifdef B2500000
+       case 2500000:
+               return B2500000;
+#endif
+#ifdef B2000000
+       case 2000000:
+               return B2000000;
+#endif
+#ifdef B1500000
+       case 1500000:
+               return B1500000;
+#endif
+#ifdef B1152000
+       case 1152000:
+               return B1152000;
+#endif
+#ifdef B1000000
+       case 1000000:
+               return B1000000;
+#endif
+#ifdef B921600
+       case 921600:
+               return B921600;
+#endif
+#ifdef B614400
+       case 614400:
+               return B614400;
+#endif
+#ifdef B576000
+       case 576000:
+               return B576000;
+#endif
+#ifdef B500000
+       case 500000:
+               return B500000;
+#endif
+#ifdef B460800
+       case 460800:
+               return B460800;
+#endif
+#ifdef B307200
+       case 307200:
+               return B307200;
+#endif
+#ifdef B230400
+       case 230400:
+               return B230400;
+#endif
+#ifdef B153600
+       case 153600:
+               return B153600;
+#endif
+#ifdef B115200
        case 115200:
                return B115200;
+#endif
+#ifdef B76800
+       case 76800:
+               return B76800;
+#endif
+#ifdef B57600
        case 57600:
                return B57600;
+#endif
+#ifdef B38400
        case 38400:
                return B38400;
+#endif
+#ifdef B19200
        case 19200:
                return B19200;
+#endif
+#ifdef B9600
        case 9600:
                return B9600;
+#endif
+#ifdef B4800
+       case 4800:
+               return B4800;
+#endif
+#ifdef B2400
+       case 2400:
+               return B2400;
+#endif
+#ifdef B1800
+       case 1800:
+               return B1800;
+#endif
+#ifdef B1200
+       case 1200:
+               return B1200;
+#endif
+#ifdef B600
+       case 600:
+               return B600;
+#endif
+#ifdef B300
+       case 300:
+               return B300;
+#endif
+#ifdef B200
+       case 200:
+               return B200;
+#endif
+#ifdef B150
+       case 150:
+               return B150;
+#endif
+#ifdef B134
+       case 134:
+               return B134;
+#endif
+#ifdef B110
+       case 110:
+               return B110;
+#endif
+#ifdef B75
+       case 75:
+               return B75;
+#endif
+#ifdef B50
+       case 50:
+               return B50;
+#endif
+       default:
+               return B0;
+       }
+}
+
+static int
+kwboot_tty_change_baudrate(int fd, int baudrate)
+{
+       struct termios tio;
+       speed_t speed;
+       int rc;
+
+       rc = tcgetattr(fd, &tio);
+       if (rc)
+               return rc;
+
+       speed = kwboot_tty_baudrate_to_speed(baudrate);
+       if (speed == B0) {
+               errno = EINVAL;
+               return -1;
        }
 
-       return -1;
+       rc = cfsetospeed(&tio, speed);
+       if (rc)
+               return rc;
+
+       rc = cfsetispeed(&tio, speed);
+       if (rc)
+               return rc;
+
+       rc = tcsetattr(fd, TCSANOW, &tio);
+       if (rc)
+               return rc;
+
+       return 0;
 }
 
 static int
-kwboot_open_tty(const char *path, speed_t speed)
+kwboot_open_tty(const char *path, int baudrate)
 {
        int rc, fd;
        struct termios tio;
@@ -271,13 +610,14 @@ kwboot_open_tty(const char *path, speed_t speed)
        tio.c_cc[VMIN] = 1;
        tio.c_cc[VTIME] = 10;
 
-       cfsetospeed(&tio, speed);
-       cfsetispeed(&tio, speed);
-
        rc = tcsetattr(fd, TCSANOW, &tio);
        if (rc)
                goto out;
 
+       rc = kwboot_tty_change_baudrate(fd, baudrate);
+       if (rc)
+               goto out;
+
        rc = fd;
 out:
        if (rc < 0) {
@@ -426,7 +766,34 @@ _xm_reply_to_error(int c)
 }
 
 static int
-kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print)
+kwboot_baud_magic_handle(int fd, char c, int baudrate)
+{
+       static size_t rcv_len;
+
+       if (rcv_len < sizeof(kwb_baud_magic)) {
+               /* try to recognize whole magic word */
+               if (c == kwb_baud_magic[rcv_len]) {
+                       rcv_len++;
+               } else {
+                       printf("%.*s%c", (int)rcv_len, kwb_baud_magic, c);
+                       fflush(stdout);
+                       rcv_len = 0;
+               }
+       }
+
+       if (rcv_len == sizeof(kwb_baud_magic)) {
+               /* magic word received */
+               kwboot_printv("\nChanging baudrate to %d Bd\n", baudrate);
+
+               return kwboot_tty_change_baudrate(fd, baudrate) ? : 1;
+       } else {
+               return 0;
+       }
+}
+
+static int
+kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print,
+                    int baudrate, int *baud_changed)
 {
        int timeout = allow_non_xm ? KWBOOT_HDR_RSP_TIMEO : blk_rsp_timeo;
        uint64_t recv_until = _now() + timeout;
@@ -434,6 +801,8 @@ kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print)
 
        if (non_xm_print)
                *non_xm_print = 0;
+       if (baud_changed)
+               *baud_changed = 0;
 
        while (1) {
                rc = kwboot_tty_recv(fd, c, 1, timeout);
@@ -451,15 +820,30 @@ kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print)
                        break;
 
                /*
-                * If printing non-xmodem text output is allowed and such a byte
-                * was received, print it and increase receiving time.
+                * If receiving/printing non-xmodem text output is allowed and
+                * such a byte was received, we want to increase receiving time
+                * and either:
+                * - print the byte, if it is not part of baudrate change magic
+                *   sequence while baudrate change was requested (-B option)
+                * - change baudrate
                 * Otherwise decrease timeout by time elapsed.
                 */
                if (allow_non_xm) {
                        recv_until = _now() + timeout;
-                       putchar(*c);
-                       fflush(stdout);
-                       *non_xm_print = 1;
+
+                       if (baudrate && !*baud_changed) {
+                               rc = kwboot_baud_magic_handle(fd, *c, baudrate);
+                               if (rc == 1)
+                                       *baud_changed = 1;
+                               else if (!rc)
+                                       *non_xm_print = 1;
+                               else
+                                       return rc;
+                       } else if (!baudrate || !*baud_changed) {
+                               putchar(*c);
+                               fflush(stdout);
+                               *non_xm_print = 1;
+                       }
                } else {
                        timeout = recv_until - _now();
                        if (timeout < 0) {
@@ -474,10 +858,10 @@ kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print)
 
 static int
 kwboot_xm_sendblock(int fd, struct kwboot_block *block, int allow_non_xm,
-                   int *done_print)
+                   int *done_print, int baudrate)
 {
-       int non_xm_print;
-       int rc, retries;
+       int non_xm_print, baud_changed;
+       int rc, err, retries;
        char c;
 
        *done_print = 0;
@@ -494,9 +878,10 @@ kwboot_xm_sendblock(int fd, struct kwboot_block *block, int allow_non_xm,
                        *done_print = 1;
                }
 
-               rc = kwboot_xm_recv_reply(fd, &c, allow_non_xm, &non_xm_print);
+               rc = kwboot_xm_recv_reply(fd, &c, allow_non_xm, &non_xm_print,
+                                         baudrate, &baud_changed);
                if (rc)
-                       return rc;
+                       goto can;
 
                if (!allow_non_xm && c != ACK)
                        kwboot_progress(-1, '+');
@@ -505,7 +890,20 @@ kwboot_xm_sendblock(int fd, struct kwboot_block *block, int allow_non_xm,
        if (non_xm_print)
                kwboot_printv("\n");
 
+       if (allow_non_xm && baudrate && !baud_changed) {
+               fprintf(stderr, "Baudrate was not changed\n");
+               rc = -1;
+               errno = EPROTO;
+               goto can;
+       }
+
        return _xm_reply_to_error(c);
+can:
+       err = errno;
+       kwboot_tty_send_char(fd, CAN);
+       kwboot_printv("\n");
+       errno = err;
+       return rc;
 }
 
 static int
@@ -522,7 +920,7 @@ kwboot_xm_finish(int fd)
                if (rc)
                        return rc;
 
-               rc = kwboot_xm_recv_reply(fd, &c, 0, NULL);
+               rc = kwboot_xm_recv_reply(fd, &c, 0, NULL, 0, NULL);
                if (rc)
                        return rc;
        } while (c == NAK && retries-- > 0);
@@ -532,7 +930,7 @@ kwboot_xm_finish(int fd)
 
 static int
 kwboot_xmodem_one(int tty, int *pnum, int header, const uint8_t *data,
-                 size_t size)
+                 size_t size, int baudrate)
 {
        int done_print = 0;
        size_t sent, left;
@@ -555,7 +953,7 @@ kwboot_xmodem_one(int tty, int *pnum, int header, const uint8_t *data,
                last_block = (left <= blksz);
 
                rc = kwboot_xm_sendblock(tty, &block, header && last_block,
-                                        &done_print);
+                                        &done_print, baudrate);
                if (rc)
                        goto out;
 
@@ -576,7 +974,7 @@ out:
 }
 
 static int
-kwboot_xmodem(int tty, const void *_img, size_t size)
+kwboot_xmodem(int tty, const void *_img, size_t size, int baudrate)
 {
        const uint8_t *img = _img;
        int rc, pnum;
@@ -590,18 +988,41 @@ kwboot_xmodem(int tty, const void *_img, size_t size)
 
        pnum = 1;
 
-       rc = kwboot_xmodem_one(tty, &pnum, 1, img, hdrsz);
+       rc = kwboot_xmodem_one(tty, &pnum, 1, img, hdrsz, baudrate);
        if (rc)
                return rc;
 
        img += hdrsz;
        size -= hdrsz;
 
-       rc = kwboot_xmodem_one(tty, &pnum, 0, img, size);
+       rc = kwboot_xmodem_one(tty, &pnum, 0, img, size, 0);
+       if (rc)
+               return rc;
+
+       rc = kwboot_xm_finish(tty);
        if (rc)
                return rc;
 
-       return kwboot_xm_finish(tty);
+       if (baudrate) {
+               char buf[sizeof(kwb_baud_magic)];
+
+               /* Wait 1s for baudrate change magic */
+               rc = kwboot_tty_recv(tty, buf, sizeof(buf), 1000);
+               if (rc)
+                       return rc;
+
+               if (memcmp(buf, kwb_baud_magic, sizeof(buf))) {
+                       errno = EPROTO;
+                       return -1;
+               }
+
+               kwboot_printv("\nChanging baudrate back to 115200 Bd\n\n");
+               rc = kwboot_tty_change_baudrate(tty, 115200);
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
 }
 
 static int
@@ -782,6 +1203,37 @@ kwboot_img_is_secure(void *img)
        return 0;
 }
 
+static void *
+kwboot_img_grow_data_left(void *img, size_t *size, size_t grow)
+{
+       uint32_t hdrsz, datasz, srcaddr;
+       struct main_hdr_v1 *hdr = img;
+       uint8_t *data;
+
+       srcaddr = le32_to_cpu(hdr->srcaddr);
+
+       hdrsz = kwbheader_size(hdr);
+       data = (uint8_t *)img + srcaddr;
+       datasz = *size - srcaddr;
+
+       /* only move data if there is not enough space */
+       if (hdrsz + grow > srcaddr) {
+               size_t need = hdrsz + grow - srcaddr;
+
+               /* move data by enough bytes */
+               memmove(data + need, data, datasz);
+               *size += need;
+               srcaddr += need;
+       }
+
+       srcaddr -= grow;
+       hdr->srcaddr = cpu_to_le32(srcaddr);
+       hdr->destaddr = cpu_to_le32(le32_to_cpu(hdr->destaddr) - grow);
+       hdr->blocksize = cpu_to_le32(le32_to_cpu(hdr->blocksize) + grow);
+
+       return (uint8_t *)img + srcaddr;
+}
+
 static void
 kwboot_img_grow_hdr(void *img, size_t *size, size_t grow)
 {
@@ -813,8 +1265,71 @@ kwboot_img_grow_hdr(void *img, size_t *size, size_t grow)
        }
 }
 
+static void *
+kwboot_add_bin_ohdr_v1(void *img, size_t *size, uint32_t binsz)
+{
+       struct main_hdr_v1 *hdr = img;
+       struct opt_hdr_v1 *ohdr;
+       uint32_t ohdrsz;
+
+       ohdrsz = binsz + 8 + sizeof(*ohdr);
+       kwboot_img_grow_hdr(img, size, ohdrsz);
+
+       if (hdr->ext & 0x1) {
+               for_each_opt_hdr_v1 (ohdr, img)
+                       if (opt_hdr_v1_next(ohdr) == NULL)
+                               break;
+
+               *opt_hdr_v1_ext(ohdr) |= 1;
+               ohdr = opt_hdr_v1_next(ohdr);
+       } else {
+               hdr->ext |= 1;
+               ohdr = (void *)(hdr + 1);
+       }
+
+       ohdr->headertype = OPT_HDR_V1_BINARY_TYPE;
+       ohdr->headersz_msb = ohdrsz >> 16;
+       ohdr->headersz_lsb = cpu_to_le16(ohdrsz & 0xffff);
+
+       memset(&ohdr->data[0], 0, ohdrsz - sizeof(*ohdr));
+
+       return &ohdr->data[4];
+}
+
+static void
+_copy_baudrate_change_code(struct main_hdr_v1 *hdr, void *dst, int pre,
+                          int old_baud, int new_baud)
+{
+       size_t codesz = sizeof(kwboot_baud_code);
+       uint8_t *code = dst;
+
+       if (pre) {
+               size_t presz = sizeof(kwboot_pre_baud_code);
+
+               /*
+                * We need to prepend code that loads lr register with original
+                * value of hdr->execaddr. We do this by putting the original
+                * exec address before the code that loads it relatively from
+                * it's beginning.
+                * Afterwards we change the exec address to this code (which is
+                * at offset 4, because the first 4 bytes contain the original
+                * exec address).
+                */
+               memcpy(code, kwboot_pre_baud_code, presz);
+               *(uint32_t *)code = hdr->execaddr;
+
+               hdr->execaddr = cpu_to_le32(le32_to_cpu(hdr->destaddr) + 4);
+
+               code += presz;
+       }
+
+       memcpy(code, kwboot_baud_code, codesz - 8);
+       *(uint32_t *)(code + codesz - 8) = cpu_to_le32(old_baud);
+       *(uint32_t *)(code + codesz - 4) = cpu_to_le32(new_baud);
+}
+
 static int
-kwboot_img_patch_hdr(void *img, size_t *size)
+kwboot_img_patch(void *img, size_t *size, int baudrate)
 {
        int rc;
        struct main_hdr_v1 *hdr;
@@ -908,6 +1423,51 @@ kwboot_img_patch_hdr(void *img, size_t *size)
                hdr->blockid = IBR_HDR_UART_ID;
        }
 
+       if (baudrate) {
+               uint32_t codesz = sizeof(kwboot_baud_code);
+               void *code;
+
+               if (image_ver == 0) {
+                       fprintf(stderr,
+                               "Cannot inject code for changing baudrate into v0 image header\n");
+                       errno = EINVAL;
+                       goto out;
+               }
+
+               if (is_secure) {
+                       fprintf(stderr,
+                               "Cannot inject code for changing baudrate into image with secure header\n");
+                       errno = EINVAL;
+                       goto out;
+               }
+
+               /*
+                * First inject code that changes the baudrate from the default
+                * value of 115200 Bd to requested value. This code is inserted
+                * as a new opt hdr, so it is executed by BootROM after the
+                * header part is received.
+                */
+               kwboot_printv("Injecting binary header code for changing baudrate to %d Bd\n",
+                             baudrate);
+
+               code = kwboot_add_bin_ohdr_v1(img, size, codesz);
+               _copy_baudrate_change_code(hdr, code, 0, 115200, baudrate);
+
+               /*
+                * Now inject code that changes the baudrate back to 115200 Bd.
+                * This code is prepended to the data part of the image, so it
+                * is executed before U-Boot proper.
+                */
+               kwboot_printv("Injecting code for changing baudrate back\n");
+
+               codesz += sizeof(kwboot_pre_baud_code);
+               code = kwboot_img_grow_data_left(img, size, codesz);
+               _copy_baudrate_change_code(hdr, code, 1, baudrate, 115200);
+
+               /* recompute header size */
+               hdrsz = kwbheader_size(hdr);
+       }
+
        if (hdrsz % KWBOOT_XM_BLKSZ) {
                size_t offset = (KWBOOT_XM_BLKSZ - hdrsz % KWBOOT_XM_BLKSZ) %
                                KWBOOT_XM_BLKSZ;
@@ -964,7 +1524,8 @@ main(int argc, char **argv)
        void *debugmsg;
        void *img;
        size_t size;
-       speed_t speed;
+       size_t after_img_rsv;
+       int baudrate;
 
        rv = 1;
        tty = -1;
@@ -974,7 +1535,8 @@ main(int argc, char **argv)
        img = NULL;
        term = 0;
        size = 0;
-       speed = B115200;
+       after_img_rsv = KWBOOT_XM_BLKSZ;
+       baudrate = 115200;
 
        kwboot_verbose = isatty(STDOUT_FILENO);
 
@@ -1024,9 +1586,7 @@ main(int argc, char **argv)
                        break;
 
                case 'B':
-                       speed = kwboot_tty_speed(atoi(optarg));
-                       if (speed == -1)
-                               goto usage;
+                       baudrate = atoi(optarg);
                        break;
 
                case 'h':
@@ -1044,20 +1604,29 @@ main(int argc, char **argv)
 
        ttypath = argv[optind++];
 
-       tty = kwboot_open_tty(ttypath, speed);
+       tty = kwboot_open_tty(ttypath, imgpath ? 115200 : baudrate);
        if (tty < 0) {
                perror(ttypath);
                goto out;
        }
 
+       if (baudrate == 115200)
+               /* do not change baudrate during Xmodem to the same value */
+               baudrate = 0;
+       else
+               /* ensure we have enough space for baudrate change code */
+               after_img_rsv += KWBOOT_BAUDRATE_BIN_HEADER_SZ +
+                                sizeof(kwboot_pre_baud_code) +
+                                sizeof(kwboot_baud_code);
+
        if (imgpath) {
-               img = kwboot_read_image(imgpath, &size, KWBOOT_XM_BLKSZ);
+               img = kwboot_read_image(imgpath, &size, after_img_rsv);
                if (!img) {
                        perror(imgpath);
                        goto out;
                }
 
-               rc = kwboot_img_patch_hdr(img, &size);
+               rc = kwboot_img_patch(img, &size, baudrate);
                if (rc) {
                        fprintf(stderr, "%s: Invalid image.\n", imgpath);
                        goto out;
@@ -1079,7 +1648,7 @@ main(int argc, char **argv)
        }
 
        if (img) {
-               rc = kwboot_xmodem(tty, img, size);
+               rc = kwboot_xmodem(tty, img, size, baudrate);
                if (rc) {
                        perror("xmodem");
                        goto out;