autobahn check utf8 on TEXT and CLOSE
authorAndy Green <andy.green@linaro.org>
Tue, 29 Dec 2015 01:46:03 +0000 (09:46 +0800)
committerAndy Green <andy.green@linaro.org>
Tue, 29 Dec 2015 01:46:03 +0000 (09:46 +0800)
Signed-off-by: Andy Green <andy.green@linaro.org>
lib/client-parser.c
lib/private-libwebsockets.h

index 90fd305..6d511ea 100644 (file)
@@ -41,6 +41,7 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
                        case LWSWSOPC_TEXT_FRAME:
                        case LWSWSOPC_BINARY_FRAME:
                                wsi->u.ws.continuation_possible = 1;
+                               wsi->u.ws.utf8 = 0;
                                break;
                        case LWSWSOPC_CONTINUATION:
                                if (!wsi->u.ws.continuation_possible) {
@@ -48,6 +49,9 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
                                        return -1;
                                }
                                break;
+                       case LWSWSOPC_CLOSE:
+                               wsi->u.ws.utf8 = 0;
+                               break;
                        case 3:
                        case 4:
                        case 5:
@@ -274,15 +278,73 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
                        return 1;
                }
 
-               if ((!wsi->u.ws.this_frame_masked) || wsi->u.ws.all_zero_nonce)
-                       wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
-                              (wsi->u.ws.rx_user_buffer_head++)] = c;
-               else
-                       wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
-                              (wsi->u.ws.rx_user_buffer_head++)] =
-                       c ^ wsi->u.ws.mask_nonce[
+               if (wsi->u.ws.this_frame_masked && !wsi->u.ws.all_zero_nonce)
+                       c ^= wsi->u.ws.mask_nonce[
                                            (wsi->u.ws.frame_mask_index++) & 3];
 
+               if ((!wsi->u.ws.frame_is_binary &&
+                    wsi->u.ws.opcode == LWSWSOPC_CONTINUATION) ||
+                    wsi->u.ws.opcode == LWSWSOPC_TEXT_FRAME ||
+                   (wsi->u.ws.opcode == LWSWSOPC_CLOSE &&
+                    wsi->u.ws.rx_user_buffer_head > 2)) {
+                       static const unsigned char e0f4[] = {
+                               0xa0 | ((2 - 1) << 2) | 1, /* e0 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e1 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e2 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e3 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e4 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e5 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e6 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e7 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e8 */
+                               0x80 | ((4 - 1) << 2) | 1, /* e9 */
+                               0x80 | ((4 - 1) << 2) | 1, /* ea */
+                               0x80 | ((4 - 1) << 2) | 1, /* eb */
+                               0x80 | ((4 - 1) << 2) | 1, /* ec */
+                               0x80 | ((2 - 1) << 2) | 1, /* ed */
+                               0x80 | ((4 - 1) << 2) | 1, /* ee */
+                               0x80 | ((4 - 1) << 2) | 1, /* ef */
+                               0x90 | ((3 - 1) << 2) | 2, /* f0 */
+                               0x80 | ((4 - 1) << 2) | 2, /* f1 */
+                               0x80 | ((4 - 1) << 2) | 2, /* f2 */
+                               0x80 | ((4 - 1) << 2) | 2, /* f3 */
+                               0x80 | ((1 - 1) << 2) | 2, /* f4 */
+
+                               0,                         /* s0 */
+                               0x80 | ((4 - 1) << 2) | 0, /* s2 */
+                               0x80 | ((4 - 1) << 2) | 1, /* s3 */
+                       };
+
+                       if (!wsi->u.ws.utf8) {
+                               if (c >= 0x80) {
+                                       if (c < 0xc2 || c > 0xf4)
+                                               goto utf8_fail;
+                                       if (c < 0xe0)
+                                               wsi->u.ws.utf8 = 0x80 |
+                                                                ((4 - 1) << 2);
+                                       else
+                                               wsi->u.ws.utf8 = e0f4[c - 0xe0];
+                               }
+                       } else {
+                               if (c < (wsi->u.ws.utf8 & 0xf0) ||
+                                   c >= (wsi->u.ws.utf8 & 0xf0) + 0x10 +
+                                        ((wsi->u.ws.utf8 << 2) & 0x30))
+                                       goto utf8_fail;
+                               wsi->u.ws.utf8 = e0f4[21 + (wsi->u.ws.utf8 & 3)];
+                       }
+
+                       /* we are ending partway through utf-8 character? */
+                       if (wsi->u.ws.final &&
+                           wsi->u.ws.rx_packet_length == 1 &&
+                           wsi->u.ws.utf8) {
+utf8_fail:                     lwsl_info("utf8 error\n");
+                               return -1;
+                       }
+               }
+
+               wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
+                              (wsi->u.ws.rx_user_buffer_head++)] = c;
+
                if (--wsi->u.ws.rx_packet_length == 0) {
                        /* spill because we have the whole frame */
                        wsi->lws_rx_parse_state = LWS_RXPS_NEW;
index 06dd64c..d6b5175 100644 (file)
@@ -847,6 +847,7 @@ struct _lws_websocket_related {
        unsigned char rsv;
        /* zero if no info, or length including 2-byte close code */
        unsigned char close_in_ping_buffer_len;
+       unsigned char utf8;
 
        unsigned int final:1;
        unsigned int frame_is_binary:1;