+2018-08-16 Bernd Edlinger <bernd.edlinger@hotmail.de>
+
+ * builtins.c (c_strlen): Add new parameter eltsize. Use it
+ for determining how to count the elements.
+ * builtins.h (c_strlen): Adjust prototype.
+ * expr.c (string_constant): Add new parameter mem_size.
+ Set *mem_size appropriately.
+ * expr.h (string_constant): Adjust protoype.
+ * gimple-fold.c (get_range_strlen): Add new parameter eltsize.
+ * gimple-fold.h (get_range_strlen): Adjust prototype.
+ * gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize.
+ (format_string): Call get_string_length with eltsize.
+
2018-08-16 David Malcolm <dmalcolm@redhat.com>
* diagnostic.c (default_diagnostic_start_span_fn): Call pp_string
accesses. Note that this implies the result is not going to be emitted
into the instruction stream.
- The value returned is of type `ssizetype'.
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1.
- Unfortunately, string_constant can't access the values of const char
- arrays with initializers, so neither can we do so here. */
+ The value returned is of type `ssizetype'. */
tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, unsigned eltsize)
{
+ gcc_assert (eltsize == 1 || eltsize == 2 || eltsize == 4);
STRIP_NOPS (src);
if (TREE_CODE (src) == COND_EXPR
&& (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
{
tree len1, len2;
- len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
- len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+ len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
+ len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize);
if (tree_int_cst_equal (len1, len2))
return len1;
}
if (TREE_CODE (src) == COMPOUND_EXPR
&& (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
- return c_strlen (TREE_OPERAND (src, 1), only_value);
+ return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
location_t loc = EXPR_LOC_OR_LOC (src, input_location);
/* Offset from the beginning of the string in bytes. */
tree byteoff;
- src = string_constant (src, &byteoff);
+ tree memsize;
+ src = string_constant (src, &byteoff, &memsize);
if (src == 0)
return NULL_TREE;
/* Determine the size of the string element. */
- unsigned eltsize
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src))));
+ if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))))
+ return NULL_TREE;
/* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
length of SRC. Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
strelts = strelts / eltsize - 1;
- HOST_WIDE_INT maxelts = strelts;
- tree type = TREE_TYPE (src);
- if (tree size = TYPE_SIZE_UNIT (type))
- if (tree_fits_shwi_p (size))
- {
- maxelts = tree_to_uhwi (size);
- maxelts = maxelts / eltsize - 1;
- }
+ if (!tree_fits_uhwi_p (memsize))
+ return NULL_TREE;
+
+ HOST_WIDE_INT maxelts = tree_to_uhwi (memsize) / eltsize - 1;
/* PTR can point to the byte representation of any string type, including
char* and wchar_t*. */
if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
{
+ /* For empty strings the result should be zero. */
+ if (maxelts == 0)
+ return ssize_int (0);
+
+ /* The code below works only for single byte character types. */
+ if (eltsize != 1)
+ return NULL_TREE;
+
/* If the string has an internal NUL character followed by any
non-NUL characters (e.g., "foo\0bar"), we can't compute
the offset to the following NUL if we don't know where to
start searching for it. */
unsigned len = string_length (ptr, eltsize, strelts);
- if (len < strelts)
- {
- /* Return when an embedded null character is found. */
- return NULL_TREE;
- }
- if (!maxelts)
- return ssize_int (0);
+ /* Return when an embedded null character is found or none at all. */
+ if (len < strelts || len > maxelts)
+ return NULL_TREE;
/* We don't know the starting offset, but we do know that the string
has no internal zero bytes. If the offset falls within the bounds
tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff;
offsave = fold_convert (ssizetype, offsave);
tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
- build_int_cst (ssizetype, len * eltsize));
- tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), offsave);
+ build_int_cst (ssizetype, len));
+ tree lenexp = size_diffop_loc (loc, ssize_int (strelts), offsave);
return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
build_zero_cst (ssizetype));
}
return NULL_TREE;
}
+ /* If eltoff is larger than strelts but less than maxelts the
+ string length is zero, since the excess memory will be zero. */
+ if (eltoff > strelts)
+ return ssize_int (0);
+
/* Use strlen to search for the first zero byte. Since any strings
constructed with build_string will have nulls appended, we win even
if we get handed something like (char[4])"abcd".
Since ELTOFF is our starting index into the string, no further
calculation is needed. */
unsigned len = string_length (ptr + eltoff * eltsize, eltsize,
- maxelts - eltoff);
+ strelts - eltoff);
return ssize_int (len);
}
unsigned HOST_WIDE_INT *);
extern unsigned int get_pointer_alignment (tree);
extern unsigned string_length (const void*, unsigned, unsigned);
-extern tree c_strlen (tree, int);
+extern tree c_strlen (tree, int, unsigned = 1);
extern void expand_builtin_setjmp_setup (rtx, rtx);
extern void expand_builtin_setjmp_receiver (rtx);
extern void expand_builtin_update_setjmp_buf (rtx);
/* Return the tree node if an ARG corresponds to a string constant or zero
if it doesn't. If we return nonzero, set *PTR_OFFSET to the (possibly
non-constant) offset in bytes within the string that ARG is accessing.
- The type of the offset is sizetype. */
+ The type of the offset is sizetype. If MEM_SIZE is non-zero the storage
+ size of the memory is returned. If MEM_SIZE is zero, the string is
+ only returned when it is properly zero terminated. */
tree
-string_constant (tree arg, tree *ptr_offset)
+string_constant (tree arg, tree *ptr_offset, tree *mem_size)
{
tree array;
STRIP_NOPS (arg);
return NULL_TREE;
tree offset;
- if (tree str = string_constant (arg0, &offset))
+ if (tree str = string_constant (arg0, &offset, mem_size))
{
/* Avoid pointers to arrays (see bug 86622). */
if (POINTER_TYPE_P (TREE_TYPE (arg))
if (TREE_CODE (array) == STRING_CST)
{
*ptr_offset = fold_convert (sizetype, offset);
+ if (mem_size)
+ *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
return array;
}
unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
length = string_length (TREE_STRING_POINTER (init), charsize,
length / charsize);
- if (compare_tree_int (array_size, length + 1) < 0)
+ if (mem_size)
+ *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
+ else if (compare_tree_int (array_size, length + 1) < 0)
return NULL_TREE;
*ptr_offset = offset;
/* Return the tree node and offset if a given argument corresponds to
a string constant. */
-extern tree string_constant (tree, tree *);
+extern tree string_constant (tree, tree *, tree * = NULL);
/* Two different ways of generating switch statements. */
extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability);
Set *FLEXP to true if the range of the string lengths has been
obtained from the upper bound of an array at the end of a struct.
Such an array may hold a string that's longer than its upper bound
- due to it being used as a poor-man's flexible array member. */
+ due to it being used as a poor-man's flexible array member.
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1. */
static bool
get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
- int fuzzy, bool *flexp)
+ int fuzzy, bool *flexp, unsigned eltsize = 1)
{
tree var, val = NULL_TREE;
gimple *def_stmt;
tree aop0 = TREE_OPERAND (op, 0);
if (TREE_CODE (aop0) == INDIRECT_REF
&& TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME)
- return get_range_strlen (TREE_OPERAND (aop0, 0),
- length, visited, type, fuzzy, flexp);
+ return get_range_strlen (TREE_OPERAND (aop0, 0), length,
+ visited, type, fuzzy, flexp, eltsize);
}
else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy)
{
return false;
}
else
- val = c_strlen (arg, 1);
+ val = c_strlen (arg, 1, eltsize);
if (!val && fuzzy)
{
if (TREE_CODE (arg) == ADDR_EXPR)
return get_range_strlen (TREE_OPERAND (arg, 0), length,
- visited, type, fuzzy, flexp);
+ visited, type, fuzzy, flexp, eltsize);
if (TREE_CODE (arg) == ARRAY_REF)
{
|| gimple_assign_unary_nop_p (def_stmt))
{
tree rhs = gimple_assign_rhs1 (def_stmt);
- return get_range_strlen (rhs, length, visited, type, fuzzy, flexp);
+ return get_range_strlen (rhs, length, visited, type, fuzzy, flexp,
+ eltsize);
}
else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
{
for (unsigned int i = 0; i < 2; i++)
if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
- flexp))
+ flexp, eltsize))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
if (arg == gimple_phi_result (def_stmt))
continue;
- if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
+ if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp,
+ eltsize))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
and false if PHIs and COND_EXPRs are to be handled optimistically,
if we can determine string length minimum and maximum; it will use
the minimum from the ones where it can be determined.
- STRICT false should be only used for warning code. */
+ STRICT false should be only used for warning code.
+
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1. */
bool
-get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
+get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict)
{
bitmap visited = NULL;
bool flexarray = false;
if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
- &flexarray))
+ &flexarray, eltsize))
{
minmaxlen[0] = NULL_TREE;
minmaxlen[1] = NULL_TREE;
wide_int maxlen;
tree lenrange[2];
- if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true)
+ if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true)
&& lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
&& lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
{
extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
extern tree canonicalize_constructor_val (tree, tree);
extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2], bool = false);
+extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false);
extern tree get_maxval_strlen (tree, int);
extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
extern bool fold_stmt (gimple_stmt_iterator *);
Used by the format_string function below. */
static fmtresult
-get_string_length (tree str)
+get_string_length (tree str, unsigned eltsize)
{
if (!str)
return fmtresult ();
- if (tree slen = c_strlen (str, 1))
+ if (tree slen = c_strlen (str, 1, eltsize))
{
/* Simply return the length of the string. */
fmtresult res (tree_to_shwi (slen));
aren't known to point any such arrays result in LENRANGE[1] set
to SIZE_MAX. */
tree lenrange[2];
- bool flexarray = get_range_strlen (str, lenrange);
+ bool flexarray = get_range_strlen (str, lenrange, eltsize);
if (lenrange [0] || lenrange [1])
{
return res;
}
- return get_string_length (NULL_TREE);
+ return fmtresult ();
}
/* Return the minimum and maximum number of characters formatted
fmtresult res;
/* Compute the range the argument's length can be in. */
- fmtresult slen = get_string_length (arg);
+ int count_by = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1;
+ fmtresult slen = get_string_length (arg, count_by);
if (slen.range.min == slen.range.max
&& slen.range.min < HOST_WIDE_INT_MAX)
{