Fix ord() when dealing with utf8 chars (#19423)
authorDavid Riazati <davidriazati@fb.com>
Fri, 19 Apr 2019 17:20:43 +0000 (10:20 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 19 Apr 2019 17:27:04 +0000 (10:27 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19423
ghimport-source-id: e7449489fbc86ec1116f94027b3c1561942413ee

Reviewed By: eellison

Differential Revision: D15002847

Pulled By: driazati

fbshipit-source-id: 4560cebcfca695447423d48d65ed364e7dbdbedb

test/test_jit.py
torch/csrc/jit/register_prim_ops.cpp

index f20cfda..a995baa 100644 (file)
@@ -11022,6 +11022,13 @@ a")
         self.checkScript(fn, ("h"))
         self.checkScript(fn, ("y"))
 
+        def index_str_to_tensor(s):
+            # type: (str) -> int
+            return torch.tensor(ord(s))
+
+        s = u'\u00a3'.encode('utf8')[:1]
+        self.checkScript(index_str_to_tensor, (s,))
+
     def test_string_slicing(self):
         def fn1(x):
             # type: (str) -> str
index a579556..d36934d 100644 (file)
@@ -1825,7 +1825,8 @@ RegisterOperators reg2({
               string.size() == 1,
               "String for ord() must be 1 character, found",
               string.size());
-          push(stack, int64_t(string.at(0)));
+          uint8_t ord = string.at(0);
+          push(stack, int64_t(ord));
           return 0;
         }),
 #define CREATE_COPY_OP(other_type, c_type)                                 \