From 33d64ca5db656f1f377de18f94403d8b3b91e3a1 Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Fri, 19 Oct 2018 09:47:55 +0100 Subject: [PATCH] This set of changes clarifies the conditions for the R5900 short loop fix and extends its test with the border cases of six and seven instructions. * testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix test with border cases. * testsuite/gas/mips/r5900.d: Add extra expected disassembly. * config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short loop hardware bug conditions. Correct note on the R5900 instruction count short loop fix. --- gas/ChangeLog | 9 +++++++++ gas/config/tc-mips.c | 22 +++++++++++++++++----- gas/testsuite/gas/mips/r5900.d | 18 +++++++++++++++++- gas/testsuite/gas/mips/r5900.s | 32 ++++++++++++++++++++++++++++---- 4 files changed, 71 insertions(+), 10 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 7ba30cb..ff9de52 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,12 @@ +2018-10-19 Fredrik Noring + + * testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix + test with border cases. + * testsuite/gas/mips/r5900.d: Add extra expected disassembly. + * config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short + loop hardware bug conditions. Correct note on the R5900 + instruction count short loop fix. + 2018-10-16 Matthew Malcomson * testsuite/gas/aarch64/illegal-dotproduct.d: New test. diff --git a/gas/config/tc-mips.c b/gas/config/tc-mips.c index c9fc6c6..918525b 100644 --- a/gas/config/tc-mips.c +++ b/gas/config/tc-mips.c @@ -6982,9 +6982,21 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr, && insn_length (history) != 4) return FALSE; - /* On R5900 short loops need to be fixed by inserting a nop in - the branch delay slots. - A short loop can be terminated too early. */ + /* On the R5900 short loops need to be fixed by inserting a NOP in the + branch delay slot. + + The short loop bug under certain conditions causes loops to execute + only once or twice. We must ensure that the assembler never + generates loops that satisfy all of the following conditions: + + - a loop consists of less than or equal to six instructions + (including the branch delay slot); + - a loop contains only one conditional branch instruction at the end + of the loop; + - a loop does not contain any other branch or jump instructions; + - a branch delay slot of the loop is not NOP (EE 2.9 or later). + + We need to do this because of a hardware bug in the R5900 chip. */ if (mips_opts.arch == CPU_R5900 /* Check if instruction has a parameter, ignore "j $31". */ && (address_expr != NULL) @@ -7002,8 +7014,8 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr, || (ip->insn_opcode & 0xffff0000) == 0x04110000)) /* bgezal $0 */ { int distance; - /* Check if loop is shorter than 6 instructions including - branch and delay slot. */ + /* Check if loop is shorter than or equal to 6 instructions + including branch and delay slot. */ distance = frag_now_fix () - S_GET_VALUE (address_expr->X_add_symbol); if (distance <= 20) { diff --git a/gas/testsuite/gas/mips/r5900.d b/gas/testsuite/gas/mips/r5900.d index 7ef9a8a..082c204 100644 --- a/gas/testsuite/gas/mips/r5900.d +++ b/gas/testsuite/gas/mips/r5900.d @@ -87,7 +87,23 @@ Disassembly of section \.text: [0-9a-f]+ <[^>]*> 2403012c li \$3,300 [0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1 [0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1 -[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ +[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ [0-9a-f]+ <[^>]*> 00000000 nop +[0-9a-f]+ <[^>]*> 2403012c li \$3,300 +[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1 +[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1 +[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1 +[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1 +[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1 +[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ +[0-9a-f]+ <[^>]*> 00000000 nop +[0-9a-f]+ <[^>]*> 2403012c li \$3,300 +[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1 +[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1 +[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1 +[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1 +[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1 +[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ +[0-9a-f]+ <[^>]*> 2108ffff addi \$8,\$8,-1 [0-9a-f]+ <[^>]*> 24040003 li \$4,3 \.\.\. diff --git a/gas/testsuite/gas/mips/r5900.s b/gas/testsuite/gas/mips/r5900.s index 3a16e28..9d16b25 100644 --- a/gas/testsuite/gas/mips/r5900.s +++ b/gas/testsuite/gas/mips/r5900.s @@ -120,13 +120,37 @@ stuff: .set pop .set push .set reorder - # Short loop fix. + # Test the short loop fix with 3 loop instructions. li $3, 300 -short_loop1: +short_loop3: addi $3, -1 addi $4, -1 - # NOP should be inserted in branch delay. - bne $3, $0, short_loop1 + # A NOP will be inserted in the branch delay slot. + bne $3, $0, short_loop3 + + # Test the short loop fix with 6 loop instructions. + li $3, 300 +short_loop6: + addi $3, -1 + addi $4, -1 + addi $5, -1 + addi $6, -1 + addi $7, -1 + # A NOP will be inserted in the branch delay slot. + bne $3, $0, short_loop6 + + # Test the short loop fix with 7 loop instructions. + li $3, 300 +short_loop7: + addi $3, -1 + addi $4, -1 + addi $5, -1 + addi $6, -1 + addi $7, -1 + addi $8, -1 + # The short loop fix does not apply for loops with + # more than 6 instructions. + bne $3, $0, short_loop7 li $4, 3 .set pop -- 2.7.4