Fix ord() when dealing with utf8 chars (#19423)

author David Riazati <davidriazati@fb.com>

Fri, 19 Apr 2019 17:20:43 +0000 (10:20 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Fri, 19 Apr 2019 17:27:04 +0000 (10:27 -0700)
author David Riazati <davidriazati@fb.com>
Fri, 19 Apr 2019 17:20:43 +0000 (10:20 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 19 Apr 2019 17:27:04 +0000 (10:27 -0700)
diff --git a/test/test_jit.py b/test/test_jit.py

index f20cfda..a995baa 100644 (file)
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -11022,6 +11022,13 @@ a")
          self.checkScript(fn, ("h"))
          self.checkScript(fn, ("y"))
  
+        def index_str_to_tensor(s):
+            # type: (str) -> int
+            return torch.tensor(ord(s))
+
+        s = u'\u00a3'.encode('utf8')[:1]
+        self.checkScript(index_str_to_tensor, (s,))
+
      def test_string_slicing(self):
          def fn1(x):
              # type: (str) -> str
diff --git a/torch/csrc/jit/register_prim_ops.cpp b/torch/csrc/jit/register_prim_ops.cpp

index a579556..d36934d 100644 (file)
--- a/torch/csrc/jit/register_prim_ops.cpp
+++ b/torch/csrc/jit/register_prim_ops.cpp
@@ -1825,7 +1825,8 @@ RegisterOperators reg2({
                string.size() == 1,
                "String for ord() must be 1 character, found",
                string.size());
-          push(stack, int64_t(string.at(0)));
+          uint8_t ord = string.at(0);
+          push(stack, int64_t(ord));
            return 0;
          }),
  #define CREATE_COPY_OP(other_type, c_type)                                 \
author	David Riazati <davidriazati@fb.com>
	Fri, 19 Apr 2019 17:20:43 +0000 (10:20 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 19 Apr 2019 17:27:04 +0000 (10:27 -0700)
test/test_jit.py		patch \| blob \| history
torch/csrc/jit/register_prim_ops.cpp		patch \| blob \| history