From e7b9526dc6f23c89001b407564ff3b8160aedbbf Mon Sep 17 00:00:00 2001 From: David Riazati Date: Fri, 19 Apr 2019 10:20:43 -0700 Subject: [PATCH] Fix ord() when dealing with utf8 chars (#19423) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19423 ghimport-source-id: e7449489fbc86ec1116f94027b3c1561942413ee Reviewed By: eellison Differential Revision: D15002847 Pulled By: driazati fbshipit-source-id: 4560cebcfca695447423d48d65ed364e7dbdbedb --- test/test_jit.py | 7 +++++++ torch/csrc/jit/register_prim_ops.cpp | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test_jit.py b/test/test_jit.py index f20cfda..a995baa 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -11022,6 +11022,13 @@ a") self.checkScript(fn, ("h")) self.checkScript(fn, ("y")) + def index_str_to_tensor(s): + # type: (str) -> int + return torch.tensor(ord(s)) + + s = u'\u00a3'.encode('utf8')[:1] + self.checkScript(index_str_to_tensor, (s,)) + def test_string_slicing(self): def fn1(x): # type: (str) -> str diff --git a/torch/csrc/jit/register_prim_ops.cpp b/torch/csrc/jit/register_prim_ops.cpp index a579556..d36934d 100644 --- a/torch/csrc/jit/register_prim_ops.cpp +++ b/torch/csrc/jit/register_prim_ops.cpp @@ -1825,7 +1825,8 @@ RegisterOperators reg2({ string.size() == 1, "String for ord() must be 1 character, found", string.size()); - push(stack, int64_t(string.at(0))); + uint8_t ord = string.at(0); + push(stack, int64_t(ord)); return 0; }), #define CREATE_COPY_OP(other_type, c_type) \ -- 2.7.4