From: David Riazati Date: Fri, 19 Apr 2019 17:20:43 +0000 (-0700) Subject: Fix ord() when dealing with utf8 chars (#19423) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~126 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e7b9526dc6f23c89001b407564ff3b8160aedbbf;p=platform%2Fupstream%2Fpytorch.git Fix ord() when dealing with utf8 chars (#19423) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19423 ghimport-source-id: e7449489fbc86ec1116f94027b3c1561942413ee Reviewed By: eellison Differential Revision: D15002847 Pulled By: driazati fbshipit-source-id: 4560cebcfca695447423d48d65ed364e7dbdbedb --- diff --git a/test/test_jit.py b/test/test_jit.py index f20cfda..a995baa 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -11022,6 +11022,13 @@ a") self.checkScript(fn, ("h")) self.checkScript(fn, ("y")) + def index_str_to_tensor(s): + # type: (str) -> int + return torch.tensor(ord(s)) + + s = u'\u00a3'.encode('utf8')[:1] + self.checkScript(index_str_to_tensor, (s,)) + def test_string_slicing(self): def fn1(x): # type: (str) -> str diff --git a/torch/csrc/jit/register_prim_ops.cpp b/torch/csrc/jit/register_prim_ops.cpp index a579556..d36934d 100644 --- a/torch/csrc/jit/register_prim_ops.cpp +++ b/torch/csrc/jit/register_prim_ops.cpp @@ -1825,7 +1825,8 @@ RegisterOperators reg2({ string.size() == 1, "String for ord() must be 1 character, found", string.size()); - push(stack, int64_t(string.at(0))); + uint8_t ord = string.at(0); + push(stack, int64_t(ord)); return 0; }), #define CREATE_COPY_OP(other_type, c_type) \