In some cases plain scalars are currently parsed with a trailing
newline. In particular this shows up often when parsing JSON files, e.g.
note the `\n` after `456` below:
```
$ cat test.yaml
{
"foo": 123,
"bar": 456
}
$ yaml-bench test.yaml -canonical
%YAML 1.2
---
!!map {
? !!str "foo"
: !!str "123",
? !!str "bar"
: !!str "456\n",
}
...
```
The trailing whitespace ends up causing the conversion of the scalar to
int/bool/etc. to fail, causing the issue seen here:
https://github.com/llvm/llvm-project/issues/15877
From reading the YAML spec (https://yaml.org/spec/1.2.2/#733-plain-style)
it seems like plain scalars should never end with whitespace, so this
change trims all trailing whitespace characters from the
value (specifically `b-line-feed`, `b-carriage-return`, `s-space`, and
`s-tab`).
Reviewed By: scott.linder
Differential Revision: https://reviews.llvm.org/D137118
}
return UnquotedValue;
}
- // Plain or block.
- return Value.rtrim(' ');
+ // Plain.
+ // Trim whitespace ('b-char' and 's-white').
+ // NOTE: Alternatively we could change the scanner to not include whitespace
+ // here in the first place.
+ return Value.rtrim("\x0A\x0D\x20\x09");
}
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
--- /dev/null
+# RUN: yaml-bench -canonical %s | FileCheck %s
+
+# CHECK: !!map {
+# CHECK: ? !!str "foo"
+# CHECK: : !!str "123",
+# CHECK: ? !!str "bar"
+# CHECK: : !!str "456",
+# CHECK: }
+
+{
+ "foo": 123,
+ "bar": 456
+}
EXPECT_EQ(doc.foo, 3);
EXPECT_EQ(doc.bar, 5);
}
+
+ {
+ Input yin("{\"foo\": 3\n, \"bar\": 5}");
+ yin >> doc;
+
+ EXPECT_FALSE(yin.error());
+ EXPECT_EQ(doc.foo, 3);
+ EXPECT_EQ(doc.bar, 5);
+ }
}
TEST(YAMLIO, TestMalformedMapRead) {