Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

multiple definiton of 'yylloc' #4

Open
ghost opened this issue Jun 26, 2021 · 0 comments
Open

multiple definiton of 'yylloc' #4

ghost opened this issue Jun 26, 2021 · 0 comments

Comments

@ghost
Copy link

ghost commented Jun 26, 2021

Terminal output:

/linux-stable # make -j16
HOSTLD scripts/dtc/dtc
/usr/lib/gcc/aarch64-linux-musl/11.1.0/../../../../aarch64-linux-musl/bin/ld: scripts/dtc/dtc-parser.tab.o:(.bss+0x8): multiple definition of `yylloc'; scripts/dtc/dtc-lexer.lex.o:(.bss+0x38): first defined here
collect2: error: ld returned 1 exit status
make[1]: *** [scripts/Makefile.host:99: scripts/dtc/dtc] Error 1
make: *** [Makefile:1265: scripts_dtc] Error 2

jnettlet pushed a commit that referenced this issue Jul 16, 2021
The "auxtrace_info" and "auxtrace" functions are not set in "tool" member of
"annotate". As a result, perf annotate does not support parsing itrace data.

Before:

  # perf record -e arm_spe_0/branch_filter=1/ -a sleep 1
  [ perf record: Woken up 9 times to write data ]
  [ perf record: Captured and wrote 20.874 MB perf.data ]
  # perf annotate --stdio
  Error:
  The perf.data data has no samples!

Solution:

1. Add itrace options in help,
2. Set hook functions of "id_index", "auxtrace_info" and "auxtrace" in perf_tool.

After:

  # perf record --all-user -e arm_spe_0/branch_filter=1/ ls
  Couldn't synthesize bpf events.
  perf.data
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.010 MB perf.data ]
  # perf annotate --stdio
   Percent |      Source code & Disassembly of libc-2.28.so for branch-miss (1 samples, percent: local period)
  ------------------------------------------------------------------------------------------------------------
           :
           :
           :
           :           Disassembly of section .text:
           :
           :           0000000000066180 <__getdelim@@GLIBC_2.17>:
      0.00 :   66180:  stp     x29, x30, [sp, #-96]!
      0.00 :   66184:  cmp     x0, #0x0
      0.00 :   66188:  ccmp    x1, #0x0, #0x4, ne  // ne = any
      0.00 :   6618c:  mov     x29, sp
      0.00 :   66190:  stp     x24, x25, [sp, #56]
      0.00 :   66194:  stp     x26, x27, [sp, #72]
      0.00 :   66198:  str     x28, [sp, #88]
      0.00 :   6619c:  b.eq    66450 <__getdelim@@GLIBC_2.17+0x2d0>  // b.none
      0.00 :   661a0:  stp     x22, x23, [x29, #40]
      0.00 :   661a4:  mov     x22, x1
      0.00 :   661a8:  ldr     w1, [x3]
      0.00 :   661ac:  mov     w23, w2
      0.00 :   661b0:  stp     x20, x21, [x29, #24]
      0.00 :   661b4:  mov     x20, x3
      0.00 :   661b8:  mov     x21, x0
      0.00 :   661bc:  tbnz    w1, #15, 66360 <__getdelim@@GLIBC_2.17+0x1e0>
      0.00 :   661c0:  ldr     x0, [x3, #136]
      0.00 :   661c4:  ldr     x2, [x0, #8]
      0.00 :   661c8:  str     x19, [x29, #16]
      0.00 :   661cc:  mrs     x19, tpidr_el0
      0.00 :   661d0:  sub     x19, x19, #0x700
      0.00 :   661d4:  cmp     x2, x19
      0.00 :   661d8:  b.eq    663f0 <__getdelim@@GLIBC_2.17+0x270>  // b.none
      0.00 :   661dc:  mov     w1, #0x1                        // #1
      0.00 :   661e0:  ldaxr   w2, [x0]
      0.00 :   661e4:  cmp     w2, #0x0
      0.00 :   661e8:  b.ne    661f4 <__getdelim@@GLIBC_2.17+0x74>  // b.any
      0.00 :   661ec:  stxr    w3, w1, [x0]
      0.00 :   661f0:  cbnz    w3, 661e0 <__getdelim@@GLIBC_2.17+0x60>
      0.00 :   661f4:  b.ne    66448 <__getdelim@@GLIBC_2.17+0x2c8>  // b.any
      0.00 :   661f8:  ldr     x0, [x20, #136]
      0.00 :   661fc:  ldr     w1, [x20]
      0.00 :   66200:  ldr     w2, [x0, #4]
      0.00 :   66204:  str     x19, [x0, #8]
      0.00 :   66208:  add     w2, w2, #0x1
      0.00 :   6620c:  str     w2, [x0, #4]
      0.00 :   66210:  tbnz    w1, #5, 66388 <__getdelim@@GLIBC_2.17+0x208>
      0.00 :   66214:  ldr     x19, [x29, #16]
      0.00 :   66218:  ldr     x0, [x21]
      0.00 :   6621c:  cbz     x0, 66228 <__getdelim@@GLIBC_2.17+0xa8>
      0.00 :   66220:  ldr     x0, [x22]
      0.00 :   66224:  cbnz    x0, 6623c <__getdelim@@GLIBC_2.17+0xbc>
      0.00 :   66228:  mov     x0, #0x78                       // #120
      0.00 :   6622c:  str     x0, [x22]
      0.00 :   66230:  bl      20710 <malloc@plt>
      0.00 :   66234:  str     x0, [x21]
      0.00 :   66238:  cbz     x0, 66428 <__getdelim@@GLIBC_2.17+0x2a8>
      0.00 :   6623c:  ldr     x27, [x20, #8]
      0.00 :   66240:  str     x19, [x29, #16]
      0.00 :   66244:  ldr     x19, [x20, #16]
      0.00 :   66248:  sub     x19, x19, x27
      0.00 :   6624c:  cmp     x19, #0x0
      0.00 :   66250:  b.le    66398 <__getdelim@@GLIBC_2.17+0x218>
      0.00 :   66254:  mov     x25, #0x0                       // #0
      0.00 :   66258:  b       662d8 <__getdelim@@GLIBC_2.17+0x158>
      0.00 :   6625c:  nop
      0.00 :   66260:  add     x24, x19, x25
      0.00 :   66264:  ldr     x3, [x22]
      0.00 :   66268:  add     x26, x24, #0x1
      0.00 :   6626c:  ldr     x0, [x21]
      0.00 :   66270:  cmp     x3, x26
      0.00 :   66274:  b.cs    6629c <__getdelim@@GLIBC_2.17+0x11c>  // b.hs, b.nlast
      0.00 :   66278:  lsl     x3, x3, #1
      0.00 :   6627c:  cmp     x3, x26
      0.00 :   66280:  csel    x26, x3, x26, cs  // cs = hs, nlast
      0.00 :   66284:  mov     x1, x26
      0.00 :   66288:  bl      206f0 <realloc@plt>
      0.00 :   6628c:  cbz     x0, 66438 <__getdelim@@GLIBC_2.17+0x2b8>
      0.00 :   66290:  str     x0, [x21]
      0.00 :   66294:  ldr     x27, [x20, #8]
      0.00 :   66298:  str     x26, [x22]
      0.00 :   6629c:  mov     x2, x19
      0.00 :   662a0:  mov     x1, x27
      0.00 :   662a4:  add     x0, x0, x25
      0.00 :   662a8:  bl      87390 <explicit_bzero@@GLIBC_2.25+0x50>
      0.00 :   662ac:  ldr     x0, [x20, #8]
      0.00 :   662b0:  add     x19, x0, x19
      0.00 :   662b4:  str     x19, [x20, #8]
      0.00 :   662b8:  cbnz    x28, 66410 <__getdelim@@GLIBC_2.17+0x290>
      0.00 :   662bc:  mov     x0, x20
      0.00 :   662c0:  bl      73b80 <__underflow@@GLIBC_2.17>
      0.00 :   662c4:  cmn     w0, #0x1
      0.00 :   662c8:  b.eq    66410 <__getdelim@@GLIBC_2.17+0x290>  // b.none
      0.00 :   662cc:  ldp     x27, x19, [x20, #8]
      0.00 :   662d0:  mov     x25, x24
      0.00 :   662d4:  sub     x19, x19, x27
      0.00 :   662d8:  mov     x2, x19
      0.00 :   662dc:  mov     w1, w23
      0.00 :   662e0:  mov     x0, x27
      0.00 :   662e4:  bl      807b0 <memchr@@GLIBC_2.17>
      0.00 :   662e8:  cmp     x0, #0x0
      0.00 :   662ec:  mov     x28, x0
      0.00 :   662f0:  sub     x0, x0, x27
      0.00 :   662f4:  csinc   x19, x19, x0, eq  // eq = none
      0.00 :   662f8:  mov     x0, #0x7fffffffffffffff         // #9223372036854775807
      0.00 :   662fc:  sub     x0, x0, x25
      0.00 :   66300:  cmp     x19, x0
      0.00 :   66304:  b.lt    66260 <__getdelim@@GLIBC_2.17+0xe0>  // b.tstop
      0.00 :   66308:  adrp    x0, 17f000 <sys_sigabbrev@@GLIBC_2.17+0x320>
      0.00 :   6630c:  ldr     x0, [x0, #3624]
      0.00 :   66310:  mrs     x2, tpidr_el0
      0.00 :   66314:  ldr     x19, [x29, #16]
      0.00 :   66318:  mov     w3, #0x4b                       // #75
      0.00 :   6631c:  ldr     w1, [x20]
      0.00 :   66320:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   66324:  str     w3, [x2, x0]
      0.00 :   66328:  tbnz    w1, #15, 66340 <__getdelim@@GLIBC_2.17+0x1c0>
      0.00 :   6632c:  ldr     x0, [x20, #136]
      0.00 :   66330:  ldr     w1, [x0, #4]
      0.00 :   66334:  sub     w1, w1, #0x1
      0.00 :   66338:  str     w1, [x0, #4]
      0.00 :   6633c:  cbz     w1, 663b8 <__getdelim@@GLIBC_2.17+0x238>
      0.00 :   66340:  mov     x0, x24
      0.00 :   66344:  ldr     x28, [sp, #88]
      0.00 :   66348:  ldp     x20, x21, [x29, #24]
      0.00 :   6634c:  ldp     x22, x23, [x29, #40]
      0.00 :   66350:  ldp     x24, x25, [sp, #56]
      0.00 :   66354:  ldp     x26, x27, [sp, #72]
      0.00 :   66358:  ldp     x29, x30, [sp], #96
      0.00 :   6635c:  ret
    100.00 :   66360:  tbz     w1, #5, 66218 <__getdelim@@GLIBC_2.17+0x98>
      0.00 :   66364:  ldp     x20, x21, [x29, #24]
      0.00 :   66368:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   6636c:  ldp     x22, x23, [x29, #40]
      0.00 :   66370:  mov     x0, x24
      0.00 :   66374:  ldp     x24, x25, [sp, #56]
      0.00 :   66378:  ldp     x26, x27, [sp, #72]
      0.00 :   6637c:  ldr     x28, [sp, #88]
      0.00 :   66380:  ldp     x29, x30, [sp], #96
      0.00 :   66384:  ret
      0.00 :   66388:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   6638c:  ldr     x19, [x29, #16]
      0.00 :   66390:  b       66328 <__getdelim@@GLIBC_2.17+0x1a8>
      0.00 :   66394:  nop
      0.00 :   66398:  mov     x0, x20
      0.00 :   6639c:  bl      73b80 <__underflow@@GLIBC_2.17>
      0.00 :   663a0:  cmn     w0, #0x1
      0.00 :   663a4:  b.eq    66438 <__getdelim@@GLIBC_2.17+0x2b8>  // b.none
      0.00 :   663a8:  ldp     x27, x19, [x20, #8]
      0.00 :   663ac:  sub     x19, x19, x27
      0.00 :   663b0:  b       66254 <__getdelim@@GLIBC_2.17+0xd4>
      0.00 :   663b4:  nop
      0.00 :   663b8:  str     xzr, [x0, #8]
      0.00 :   663bc:  ldxr    w2, [x0]
      0.00 :   663c0:  stlxr   w3, w1, [x0]
      0.00 :   663c4:  cbnz    w3, 663bc <__getdelim@@GLIBC_2.17+0x23c>
      0.00 :   663c8:  cmp     w2, #0x1
      0.00 :   663cc:  b.le    66340 <__getdelim@@GLIBC_2.17+0x1c0>
      0.00 :   663d0:  mov     x1, #0x81                       // #129
      0.00 :   663d4:  mov     x2, #0x1                        // #1
      0.00 :   663d8:  mov     x3, #0x0                        // #0
      0.00 :   663dc:  mov     x8, #0x62                       // #98
      0.00 :   663e0:  svc     #0x0
      0.00 :   663e4:  ldp     x20, x21, [x29, #24]
      0.00 :   663e8:  ldp     x22, x23, [x29, #40]
      0.00 :   663ec:  b       66370 <__getdelim@@GLIBC_2.17+0x1f0>
      0.00 :   663f0:  ldr     w2, [x0, #4]
      0.00 :   663f4:  add     w2, w2, #0x1
      0.00 :   663f8:  str     w2, [x0, #4]
      0.00 :   663fc:  tbz     w1, #5, 66214 <__getdelim@@GLIBC_2.17+0x94>
      0.00 :   66400:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   66404:  ldr     x19, [x29, #16]
      0.00 :   66408:  b       66330 <__getdelim@@GLIBC_2.17+0x1b0>
      0.00 :   6640c:  nop
      0.00 :   66410:  ldr     x0, [x21]
      0.00 :   66414:  strb    wzr, [x0, x24]
      0.00 :   66418:  ldr     w1, [x20]
      0.00 :   6641c:  ldr     x19, [x29, #16]
      0.00 :   66420:  b       66328 <__getdelim@@GLIBC_2.17+0x1a8>
      0.00 :   66424:  nop
      0.00 :   66428:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   6642c:  ldr     w1, [x20]
      0.00 :   66430:  b       66328 <__getdelim@@GLIBC_2.17+0x1a8>
      0.00 :   66434:  nop
      0.00 :   66438:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   6643c:  ldr     w1, [x20]
      0.00 :   66440:  ldr     x19, [x29, #16]
      0.00 :   66444:  b       66328 <__getdelim@@GLIBC_2.17+0x1a8>
      0.00 :   66448:  bl      e3ba0 <pthread_setcanceltype@@GLIBC_2.17+0x30>
      0.00 :   6644c:  b       661f8 <__getdelim@@GLIBC_2.17+0x78>
      0.00 :   66450:  adrp    x0, 17f000 <sys_sigabbrev@@GLIBC_2.17+0x320>
      0.00 :   66454:  ldr     x0, [x0, #3624]
      0.00 :   66458:  mrs     x1, tpidr_el0
      0.00 :   6645c:  mov     w2, #0x16                       // #22
      0.00 :   66460:  mov     x24, #0xffffffffffffffff        // #-1
      0.00 :   66464:  str     w2, [x1, x0]
      0.00 :   66468:  b       66370 <__getdelim@@GLIBC_2.17+0x1f0>
      0.00 :   6646c:  ldr     w1, [x20]
      0.00 :   66470:  mov     x4, x0
      0.00 :   66474:  tbnz    w1, #15, 6648c <__getdelim@@GLIBC_2.17+0x30c>
      0.00 :   66478:  ldr     x0, [x20, #136]
      0.00 :   6647c:  ldr     w1, [x0, #4]
      0.00 :   66480:  sub     w1, w1, #0x1
      0.00 :   66484:  str     w1, [x0, #4]
      0.00 :   66488:  cbz     w1, 66494 <__getdelim@@GLIBC_2.17+0x314>
      0.00 :   6648c:  mov     x0, x4
      0.00 :   66490:  bl      20e40 <gnu_get_libc_version@@GLIBC_2.17+0x130>
      0.00 :   66494:  str     xzr, [x0, #8]
      0.00 :   66498:  ldxr    w2, [x0]
      0.00 :   6649c:  stlxr   w3, w1, [x0]
      0.00 :   664a0:  cbnz    w3, 66498 <__getdelim@@GLIBC_2.17+0x318>
      0.00 :   664a4:  cmp     w2, #0x1
      0.00 :   664a8:  b.le    6648c <__getdelim@@GLIBC_2.17+0x30c>
      0.00 :   664ac:  mov     x1, #0x81                       // #129
      0.00 :   664b0:  mov     x2, #0x1                        // #1
      0.00 :   664b4:  mov     x3, #0x0                        // #0
      0.00 :   664b8:  mov     x8, #0x62                       // #98
      0.00 :   664bc:  svc     #0x0
      0.00 :   664c0:  b       6648c <__getdelim@@GLIBC_2.17+0x30c>

Signed-off-by: Yang Jihong <[email protected]>
Tested-by: Leo Yan <[email protected]>
Acked-by: Adrian Hunter <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
jnettlet pushed a commit that referenced this issue Jul 16, 2021
Pach series "mm: thp: use generic THP migration for NUMA hinting fault", v3.

When the THP NUMA fault support was added THP migration was not supported
yet.  So the ad hoc THP migration was implemented in NUMA fault handling.
Since v4.14 THP migration has been supported so it doesn't make too much
sense to still keep another THP migration implementation rather than using
the generic migration code.  It is definitely a maintenance burden to keep
two THP migration implementation for different code paths and it is more
error prone.  Using the generic THP migration implementation allows us
remove the duplicate code and some hacks needed by the old ad hoc
implementation.

A quick grep shows x86_64, PowerPC (book3s), ARM64 ans S390 support both
THP and NUMA balancing.  The most of them support THP migration except for
S390.  Zi Yan tried to add THP migration support for S390 before but it
was not accepted due to the design of S390 PMD.  For the discussion,
please see: https://lkml.org/lkml/2018/4/27/953.

Per the discussion with Gerald Schaefer in v1 it is acceptible to skip
huge PMD for S390 for now.

I saw there were some hacks about gup from git history, but I didn't
figure out if they have been removed or not since I just found FOLL_NUMA
code in the current gup implementation and they seems useful.

Patch #1 ~ #2 are preparation patches.
Patch #3 is the real meat.
Patch #4 ~ #6 keep consistent counters and behaviors with before.
Patch #7 skips change huge PMD to prot_none if thp migration is not supported.

Test
----
Did some tests to measure the latency of do_huge_pmd_numa_page.  The test
VM has 80 vcpus and 64G memory.  The test would create 2 processes to
consume 128G memory together which would incur memory pressure to cause
THP splits.  And it also creates 80 processes to hog cpu, and the memory
consumer processes are bound to different nodes periodically in order to
increase NUMA faults.

The below test script is used:

echo 3 > /proc/sys/vm/drop_caches

# Run stress-ng for 24 hours
./stress-ng/stress-ng --vm 2 --vm-bytes 64G --timeout 24h &
PID=$!

./stress-ng/stress-ng --cpu $NR_CPUS --timeout 24h &

# Wait for vm stressors forked
sleep 5

PID_1=`pgrep -P $PID | awk 'NR == 1'`
PID_2=`pgrep -P $PID | awk 'NR == 2'`

JOB1=`pgrep -P $PID_1`
JOB2=`pgrep -P $PID_2`

# Bind load jobs to different nodes periodically to force generate
# cross node memory access
while [ -d "/proc/$PID" ]
do
        taskset -apc 8 $JOB1
        taskset -apc 8 $JOB2
        sleep 300
        taskset -apc 58 $JOB1
        taskset -apc 58 $JOB2
        sleep 300
done

With the above test the histogram of latency of do_huge_pmd_numa_page is
as shown below.  Since the number of do_huge_pmd_numa_page varies
drastically for each run (should be due to scheduler), so I converted the
raw number to percentage.

                             patched               base
@us[stress-ng]:
[0]                          3.57%                 0.16%
[1]                          55.68%                18.36%
[2, 4)                       10.46%                40.44%
[4, 8)                       7.26%                 17.82%
[8, 16)                      21.12%                13.41%
[16, 32)                     1.06%                 4.27%
[32, 64)                     0.56%                 4.07%
[64, 128)                    0.16%                 0.35%
[128, 256)                   < 0.1%                < 0.1%
[256, 512)                   < 0.1%                < 0.1%
[512, 1K)                    < 0.1%                < 0.1%
[1K, 2K)                     < 0.1%                < 0.1%
[2K, 4K)                     < 0.1%                < 0.1%
[4K, 8K)                     < 0.1%                < 0.1%
[8K, 16K)                    < 0.1%                < 0.1%
[16K, 32K)                   < 0.1%                < 0.1%
[32K, 64K)                   < 0.1%                < 0.1%

Per the result, patched kernel is even slightly better than the base
kernel.  I think this is because the lock contention against THP split is
less than base kernel due to the refactor.

To exclude the affect from THP split, I also did test w/o memory pressure.
No obvious regression is spotted.  The below is the test result *w/o*
memory pressure.

                           patched                  base
@us[stress-ng]:
[0]                        7.97%                   18.4%
[1]                        69.63%                  58.24%
[2, 4)                     4.18%                   2.63%
[4, 8)                     0.22%                   0.17%
[8, 16)                    1.03%                   0.92%
[16, 32)                   0.14%                   < 0.1%
[32, 64)                   < 0.1%                  < 0.1%
[64, 128)                  < 0.1%                  < 0.1%
[128, 256)                 < 0.1%                  < 0.1%
[256, 512)                 0.45%                   1.19%
[512, 1K)                  15.45%                  17.27%
[1K, 2K)                   < 0.1%                  < 0.1%
[2K, 4K)                   < 0.1%                  < 0.1%
[4K, 8K)                   < 0.1%                  < 0.1%
[8K, 16K)                  0.86%                   0.88%
[16K, 32K)                 < 0.1%                  0.15%
[32K, 64K)                 < 0.1%                  < 0.1%
[64K, 128K)                < 0.1%                  < 0.1%
[128K, 256K)               < 0.1%                  < 0.1%

The series also survived a series of tests that exercise NUMA balancing
migrations by Mel.

This patch (of 7):

Add orig_pmd to struct vm_fault so the "orig_pmd" parameter used by huge
page fault could be removed, just like its PTE counterpart does.

Link: https://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Yang Shi <[email protected]>
Acked-by: Mel Gorman <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
jnettlet pushed a commit that referenced this issue Jul 16, 2021
lz4 compatible decompressor is simple.  The format is underspecified and
relies on EOF notification to determine when to stop.  Initramfs buffer
format[1] explicitly states that it can have arbitrary number of zero
padding.  Thus when operating without a fill function, be extra careful to
ensure that sizes less than 4, or apperantly empty chunksizes are treated
as EOF.

To test this I have created two cpio initrds, first a normal one,
main.cpio.  And second one with just a single /test-file with content
"second" second.cpio.  Then i compressed both of them with gzip, and with
lz4 -l.  Then I created a padding of 4 bytes (dd if=/dev/zero of=pad4 bs=1
count=4).  To create four testcase initrds:

 1) main.cpio.gzip + extra.cpio.gzip = pad0.gzip
 2) main.cpio.lz4  + extra.cpio.lz4 = pad0.lz4
 3) main.cpio.gzip + pad4 + extra.cpio.gzip = pad4.gzip
 4) main.cpio.lz4  + pad4 + extra.cpio.lz4 = pad4.lz4

The pad4 test-cases replicate the initrd load by grub, as it pads and
aligns every initrd it loads.

All of the above boot, however /test-file was not accessible in the initrd
for the testcase #4, as decoding in lz4 decompressor failed.  Also an
error message printed which usually is harmless.

Whith a patched kernel, all of the above testcases now pass, and
/test-file is accessible.

This fixes lz4 initrd decompress warning on every boot with grub.  And
more importantly this fixes inability to load multiple lz4 compressed
initrds with grub.  This patch has been shipping in Ubuntu kernels since
January 2021.

[1] ./Documentation/driver-api/early-userspace/buffer-format.rst

BugLink: https://bugs.launchpad.net/bugs/1835660
Link: https://lore.kernel.org/lkml/[email protected]/ # v0
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Dimitri John Ledkov <[email protected]>
Cc: Kyungsik Lee <[email protected]>
Cc: Yinghai Lu <[email protected]>
Cc: Bongkyu Kim <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Sven Schmidt <[email protected]>
Cc: Rajat Asthana <[email protected]>
Cc: Nick Terrell <[email protected]>
Cc: Gao Xiang <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
jnettlet pushed a commit that referenced this issue Jul 16, 2021
ASan reports a memory leak caused by evlist not being deleted on exit in
perf-report, perf-script and perf-data.
The problem is caused by evlist->session not being deleted, which is
allocated in perf_session__read_header, called in perf_session__new if
perf_data is in read mode.
In case of write mode, the session->evlist is filled by the caller.
This patch solves the problem by calling evlist__delete in
perf_session__delete if perf_data is in read mode.

Changes in v2:
 - call evlist__delete from within perf_session__delete

v1: https://lore.kernel.org/lkml/[email protected]/

ASan report follows:

$ ./perf script report flamegraph
=================================================================
==227640==ERROR: LeakSanitizer: detected memory leaks

<SNIP unrelated>

Indirect leak of 2704 byte(s) in 1 object(s) allocated from:
    #0 0x4f4137 in calloc (/home/user/linux/tools/perf/perf+0x4f4137)
    #1 0xbe3d56 in zalloc /home/user/linux/tools/lib/perf/../../lib/zalloc.c:8:9
    #2 0x7f999e in evlist__new /home/user/linux/tools/perf/util/evlist.c:77:26
    #3 0x8ad938 in perf_session__read_header /home/user/linux/tools/perf/util/header.c:3797:20
    #4 0x8ec714 in perf_session__open /home/user/linux/tools/perf/util/session.c:109:6
    #5 0x8ebe83 in perf_session__new /home/user/linux/tools/perf/util/session.c:213:10
    #6 0x60c6de in cmd_script /home/user/linux/tools/perf/builtin-script.c:3856:12
    #7 0x7b2930 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #8 0x7b120f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #9 0x7b2493 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #10 0x7b0c89 in main /home/user/linux/tools/perf/perf.c:539:3
    #11 0x7f5260654b74  (/lib64/libc.so.6+0x27b74)

Indirect leak of 568 byte(s) in 1 object(s) allocated from:
    #0 0x4f4137 in calloc (/home/user/linux/tools/perf/perf+0x4f4137)
    #1 0xbe3d56 in zalloc /home/user/linux/tools/lib/perf/../../lib/zalloc.c:8:9
    #2 0x80ce88 in evsel__new_idx /home/user/linux/tools/perf/util/evsel.c:268:24
    #3 0x8aed93 in evsel__new /home/user/linux/tools/perf/util/evsel.h:210:9
    #4 0x8ae07e in perf_session__read_header /home/user/linux/tools/perf/util/header.c:3853:11
    #5 0x8ec714 in perf_session__open /home/user/linux/tools/perf/util/session.c:109:6
    #6 0x8ebe83 in perf_session__new /home/user/linux/tools/perf/util/session.c:213:10
    #7 0x60c6de in cmd_script /home/user/linux/tools/perf/builtin-script.c:3856:12
    #8 0x7b2930 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #9 0x7b120f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #10 0x7b2493 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #11 0x7b0c89 in main /home/user/linux/tools/perf/perf.c:539:3
    #12 0x7f5260654b74  (/lib64/libc.so.6+0x27b74)

Indirect leak of 264 byte(s) in 1 object(s) allocated from:
    #0 0x4f4137 in calloc (/home/user/linux/tools/perf/perf+0x4f4137)
    #1 0xbe3d56 in zalloc /home/user/linux/tools/lib/perf/../../lib/zalloc.c:8:9
    #2 0xbe3e70 in xyarray__new /home/user/linux/tools/lib/perf/xyarray.c:10:23
    #3 0xbd7754 in perf_evsel__alloc_id /home/user/linux/tools/lib/perf/evsel.c:361:21
    #4 0x8ae201 in perf_session__read_header /home/user/linux/tools/perf/util/header.c:3871:7
    #5 0x8ec714 in perf_session__open /home/user/linux/tools/perf/util/session.c:109:6
    #6 0x8ebe83 in perf_session__new /home/user/linux/tools/perf/util/session.c:213:10
    #7 0x60c6de in cmd_script /home/user/linux/tools/perf/builtin-script.c:3856:12
    #8 0x7b2930 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #9 0x7b120f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #10 0x7b2493 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #11 0x7b0c89 in main /home/user/linux/tools/perf/perf.c:539:3
    #12 0x7f5260654b74  (/lib64/libc.so.6+0x27b74)

Indirect leak of 32 byte(s) in 1 object(s) allocated from:
    #0 0x4f4137 in calloc (/home/user/linux/tools/perf/perf+0x4f4137)
    #1 0xbe3d56 in zalloc /home/user/linux/tools/lib/perf/../../lib/zalloc.c:8:9
    #2 0xbd77e0 in perf_evsel__alloc_id /home/user/linux/tools/lib/perf/evsel.c:365:14
    #3 0x8ae201 in perf_session__read_header /home/user/linux/tools/perf/util/header.c:3871:7
    #4 0x8ec714 in perf_session__open /home/user/linux/tools/perf/util/session.c:109:6
    #5 0x8ebe83 in perf_session__new /home/user/linux/tools/perf/util/session.c:213:10
    #6 0x60c6de in cmd_script /home/user/linux/tools/perf/builtin-script.c:3856:12
    #7 0x7b2930 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #8 0x7b120f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #9 0x7b2493 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #10 0x7b0c89 in main /home/user/linux/tools/perf/perf.c:539:3
    #11 0x7f5260654b74  (/lib64/libc.so.6+0x27b74)

Indirect leak of 7 byte(s) in 1 object(s) allocated from:
    #0 0x4b8207 in strdup (/home/user/linux/tools/perf/perf+0x4b8207)
    #1 0x8b4459 in evlist__set_event_name /home/user/linux/tools/perf/util/header.c:2292:16
    #2 0x89d862 in process_event_desc /home/user/linux/tools/perf/util/header.c:2313:3
    #3 0x8af319 in perf_file_section__process /home/user/linux/tools/perf/util/header.c:3651:9
    #4 0x8aa6e9 in perf_header__process_sections /home/user/linux/tools/perf/util/header.c:3427:9
    #5 0x8ae3e7 in perf_session__read_header /home/user/linux/tools/perf/util/header.c:3886:2
    #6 0x8ec714 in perf_session__open /home/user/linux/tools/perf/util/session.c:109:6
    #7 0x8ebe83 in perf_session__new /home/user/linux/tools/perf/util/session.c:213:10
    #8 0x60c6de in cmd_script /home/user/linux/tools/perf/builtin-script.c:3856:12
    #9 0x7b2930 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #10 0x7b120f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #11 0x7b2493 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #12 0x7b0c89 in main /home/user/linux/tools/perf/perf.c:539:3
    #13 0x7f5260654b74  (/lib64/libc.so.6+0x27b74)

SUMMARY: AddressSanitizer: 3728 byte(s) leaked in 7 allocation(s).

Signed-off-by: Riccardo Mancini <[email protected]>
Acked-by: Ian Rogers <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
jnettlet pushed a commit that referenced this issue Jul 16, 2021
On trogdor devices I see the following lockdep splat when stopping
youtube with lockdep enabled in the kernel.

 ======================================================
 WARNING: possible circular locking dependency detected
 5.13.0-rc2 #71 Not tainted
 ------------------------------------------------------
 ThreadPoolSingl/3969 is trying to acquire lock:
 ffffff80d4d5c080 (&inst->lock#3){+.+.}-{3:3}, at: vdec_buf_cleanup+0x3c/0x17c [venus_dec]

 but task is already holding lock:
 ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common]

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #5 (&q->mmap_lock){+.+.}-{3:3}:
        __mutex_lock_common+0xcc/0xb88
        mutex_lock_nested+0x5c/0x68
        vb2_mmap+0xf4/0x290 [videobuf2_common]
        v4l2_m2m_fop_mmap+0x44/0x50 [v4l2_mem2mem]
        v4l2_mmap+0x5c/0xa4
        mmap_region+0x310/0x5a4
        do_mmap+0x348/0x43c
        vm_mmap_pgoff+0xfc/0x178
        ksys_mmap_pgoff+0x84/0xfc
        __arm64_compat_sys_aarch32_mmap2+0x2c/0x38
        invoke_syscall+0x54/0x110
        el0_svc_common+0x88/0xf0
        do_el0_svc_compat+0x28/0x34
        el0_svc_compat+0x24/0x34
        el0_sync_compat_handler+0xc0/0xf0
        el0_sync_compat+0x19c/0x1c0

 -> #4 (&mm->mmap_lock){++++}-{3:3}:
        __might_fault+0x60/0x88
        filldir64+0x124/0x3a0
        dcache_readdir+0x7c/0x1ec
        iterate_dir+0xc4/0x184
        __arm64_sys_getdents64+0x78/0x170
        invoke_syscall+0x54/0x110
        el0_svc_common+0xa8/0xf0
        do_el0_svc_compat+0x28/0x34
        el0_svc_compat+0x24/0x34
        el0_sync_compat_handler+0xc0/0xf0
        el0_sync_compat+0x19c/0x1c0

 -> #3 (&sb->s_type->i_mutex_key#3){++++}-{3:3}:
        down_write+0x94/0x1f4
        start_creating+0xb0/0x174
        debugfs_create_dir+0x28/0x138
        opp_debug_register+0x88/0xc0
        _add_opp_dev+0x84/0x9c
        _add_opp_table_indexed+0x16c/0x310
        _of_add_table_indexed+0x70/0xb5c
        dev_pm_opp_of_add_table_indexed+0x20/0x2c
        of_genpd_add_provider_onecell+0xc4/0x1c8
        rpmhpd_probe+0x21c/0x278
        platform_probe+0xb4/0xd4
        really_probe+0x140/0x35c
        driver_probe_device+0x90/0xcc
        __device_attach_driver+0xa4/0xc0
        bus_for_each_drv+0x8c/0xd8
        __device_attach+0xc4/0x150
        device_initial_probe+0x20/0x2c
        bus_probe_device+0x40/0xa4
        device_add+0x22c/0x3fc
        of_device_add+0x44/0x54
        of_platform_device_create_pdata+0xb0/0xf4
        of_platform_bus_create+0x1d0/0x350
        of_platform_populate+0x80/0xd4
        devm_of_platform_populate+0x64/0xb0
        rpmh_rsc_probe+0x378/0x3dc
        platform_probe+0xb4/0xd4
        really_probe+0x140/0x35c
        driver_probe_device+0x90/0xcc
        __device_attach_driver+0xa4/0xc0
        bus_for_each_drv+0x8c/0xd8
        __device_attach+0xc4/0x150
        device_initial_probe+0x20/0x2c
        bus_probe_device+0x40/0xa4
        device_add+0x22c/0x3fc
        of_device_add+0x44/0x54
        of_platform_device_create_pdata+0xb0/0xf4
        of_platform_bus_create+0x1d0/0x350
        of_platform_bus_create+0x21c/0x350
        of_platform_populate+0x80/0xd4
        of_platform_default_populate_init+0xb8/0xd4
        do_one_initcall+0x1b4/0x400
        do_initcall_level+0xa8/0xc8
        do_initcalls+0x5c/0x9c
        do_basic_setup+0x2c/0x38
        kernel_init_freeable+0x1a4/0x1ec
        kernel_init+0x20/0x118
        ret_from_fork+0x10/0x30

 -> #2 (gpd_list_lock){+.+.}-{3:3}:
        __mutex_lock_common+0xcc/0xb88
        mutex_lock_nested+0x5c/0x68
        __genpd_dev_pm_attach+0x70/0x18c
        genpd_dev_pm_attach_by_id+0xe4/0x158
        genpd_dev_pm_attach_by_name+0x48/0x60
        dev_pm_domain_attach_by_name+0x2c/0x38
        dev_pm_opp_attach_genpd+0xac/0x160
        vcodec_domains_get+0x94/0x14c [venus_core]
        core_get_v4+0x150/0x188 [venus_core]
        venus_probe+0x138/0x444 [venus_core]
        platform_probe+0xb4/0xd4
        really_probe+0x140/0x35c
        driver_probe_device+0x90/0xcc
        device_driver_attach+0x58/0x7c
        __driver_attach+0xc8/0xe0
        bus_for_each_dev+0x88/0xd4
        driver_attach+0x30/0x3c
        bus_add_driver+0x10c/0x1e0
        driver_register+0x70/0x108
        __platform_driver_register+0x30/0x3c
        0xffffffde113e1044
        do_one_initcall+0x1b4/0x400
        do_init_module+0x64/0x1fc
        load_module+0x17f4/0x1958
        __arm64_sys_finit_module+0xb4/0xf0
        invoke_syscall+0x54/0x110
        el0_svc_common+0x88/0xf0
        do_el0_svc_compat+0x28/0x34
        el0_svc_compat+0x24/0x34
        el0_sync_compat_handler+0xc0/0xf0
        el0_sync_compat+0x19c/0x1c0

 -> #1 (&opp_table->genpd_virt_dev_lock){+.+.}-{3:3}:
        __mutex_lock_common+0xcc/0xb88
        mutex_lock_nested+0x5c/0x68
        _set_required_opps+0x74/0x120
        _set_opp+0x94/0x37c
        dev_pm_opp_set_rate+0xa0/0x194
        core_clks_set_rate+0x28/0x58 [venus_core]
        load_scale_v4+0x228/0x2b4 [venus_core]
        session_process_buf+0x160/0x198 [venus_core]
        venus_helper_vb2_buf_queue+0xcc/0x130 [venus_core]
        vdec_vb2_buf_queue+0xc4/0x140 [venus_dec]
        __enqueue_in_driver+0x164/0x188 [videobuf2_common]
        vb2_core_qbuf+0x13c/0x47c [videobuf2_common]
        vb2_qbuf+0x88/0xec [videobuf2_v4l2]
        v4l2_m2m_qbuf+0x84/0x15c [v4l2_mem2mem]
        v4l2_m2m_ioctl_qbuf+0x24/0x30 [v4l2_mem2mem]
        v4l_qbuf+0x54/0x68
        __video_do_ioctl+0x2bc/0x3bc
        video_usercopy+0x558/0xb04
        video_ioctl2+0x24/0x30
        v4l2_ioctl+0x58/0x68
        v4l2_compat_ioctl32+0x84/0xa0
        __arm64_compat_sys_ioctl+0x12c/0x140
        invoke_syscall+0x54/0x110
        el0_svc_common+0x88/0xf0
        do_el0_svc_compat+0x28/0x34
        el0_svc_compat+0x24/0x34
        el0_sync_compat_handler+0xc0/0xf0
        el0_sync_compat+0x19c/0x1c0

 -> #0 (&inst->lock#3){+.+.}-{3:3}:
        __lock_acquire+0x248c/0x2d6c
        lock_acquire+0x240/0x314
        __mutex_lock_common+0xcc/0xb88
        mutex_lock_nested+0x5c/0x68
        vdec_buf_cleanup+0x3c/0x17c [venus_dec]
        __vb2_queue_free+0x98/0x204 [videobuf2_common]
        vb2_core_reqbufs+0x14c/0x390 [videobuf2_common]
        vb2_reqbufs+0x58/0x74 [videobuf2_v4l2]
        v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem]
        v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem]
        v4l_reqbufs+0x58/0x6c
        __video_do_ioctl+0x2bc/0x3bc
        video_usercopy+0x558/0xb04
        video_ioctl2+0x24/0x30
        v4l2_ioctl+0x58/0x68
        v4l2_compat_ioctl32+0x84/0xa0
        __arm64_compat_sys_ioctl+0x12c/0x140
        invoke_syscall+0x54/0x110
        el0_svc_common+0x88/0xf0
        do_el0_svc_compat+0x28/0x34
        el0_svc_compat+0x24/0x34
        el0_sync_compat_handler+0xc0/0xf0
        el0_sync_compat+0x19c/0x1c0

 other info that might help us debug this:

 Chain exists of:
   &inst->lock#3 --> &mm->mmap_lock --> &q->mmap_lock

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&q->mmap_lock);
                                lock(&mm->mmap_lock);
                                lock(&q->mmap_lock);
   lock(&inst->lock#3);

  *** DEADLOCK ***

 1 lock held by ThreadPoolSingl/3969:
  #0: ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common]

 stack backtrace:
 CPU: 2 PID: 3969 Comm: ThreadPoolSingl Not tainted 5.13.0-rc2 #71
 Hardware name: Google Lazor (rev3+) with KB Backlight (DT)
 Call trace:
  dump_backtrace+0x0/0x1b4
  show_stack+0x24/0x30
  dump_stack+0xe0/0x15c
  print_circular_bug+0x32c/0x388
  check_noncircular+0x138/0x140
  __lock_acquire+0x248c/0x2d6c
  lock_acquire+0x240/0x314
  __mutex_lock_common+0xcc/0xb88
  mutex_lock_nested+0x5c/0x68
  vdec_buf_cleanup+0x3c/0x17c [venus_dec]
  __vb2_queue_free+0x98/0x204 [videobuf2_common]
  vb2_core_reqbufs+0x14c/0x390 [videobuf2_common]
  vb2_reqbufs+0x58/0x74 [videobuf2_v4l2]
  v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem]
  v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem]
  v4l_reqbufs+0x58/0x6c
  __video_do_ioctl+0x2bc/0x3bc
  video_usercopy+0x558/0xb04
  video_ioctl2+0x24/0x30
  v4l2_ioctl+0x58/0x68
  v4l2_compat_ioctl32+0x84/0xa0
  __arm64_compat_sys_ioctl+0x12c/0x140
  invoke_syscall+0x54/0x110
  el0_svc_common+0x88/0xf0
  do_el0_svc_compat+0x28/0x34
  el0_svc_compat+0x24/0x34
  el0_sync_compat_handler+0xc0/0xf0
  el0_sync_compat+0x19c/0x1c0

The 'gpd_list_lock' is nominally named as such to protect the 'gpd_list'
from concurrent access and mutation. Unfortunately, holding that mutex
around various OPP framework calls leads to lockdep splats because now
we're doing various operations in OPP core such as registering with
debugfs while holding the list lock. We don't need to hold any list
mutex while we're calling into OPP, so let's shrink the locking area of
the 'gpd_list_lock' so that lockdep isn't triggered. This also helps
reduce contention on this lock, which probably doesn't matter much but
at least is nice to have.

Cc: Len Brown <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: <[email protected]>
Cc: Viresh Kumar <[email protected]>
Signed-off-by: Stephen Boyd <[email protected]>
Reviewed-by: Ulf Hansson <[email protected]>
Signed-off-by: Rafael J. Wysocki <[email protected]>
jnettlet pushed a commit that referenced this issue Jul 16, 2021
ASan reports a heap-buffer-overflow in elf_sec__is_text when using perf-top.

The bug is caused by the fact that secstrs is built from runtime_ss, while
shdr is built from syms_ss if shdr.sh_type != SHT_NOBITS. Therefore, they
point to two different ELF files.

This patch renames secstrs to secstrs_run and adds secstrs_sym, so that
the correct secstrs is chosen depending on shdr.sh_type.

  $ ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 ./perf top
  =================================================================
  ==363148==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x61300009add6 at pc 0x00000049875c bp 0x7f4f56446440 sp 0x7f4f56445bf0
  READ of size 1 at 0x61300009add6 thread T6
    #0 0x49875b in StrstrCheck(void*, char*, char const*, char const*) (/home/user/linux/tools/perf/perf+0x49875b)
    #1 0x4d13a2 in strstr (/home/user/linux/tools/perf/perf+0x4d13a2)
    #2 0xacae36 in elf_sec__is_text /home/user/linux/tools/perf/util/symbol-elf.c:176:9
    #3 0xac3ec9 in elf_sec__filter /home/user/linux/tools/perf/util/symbol-elf.c:187:9
    #4 0xac2c3d in dso__load_sym /home/user/linux/tools/perf/util/symbol-elf.c:1254:20
    #5 0x883981 in dso__load /home/user/linux/tools/perf/util/symbol.c:1897:9
    #6 0x8e6248 in map__load /home/user/linux/tools/perf/util/map.c:332:7
    #7 0x8e66e5 in map__find_symbol /home/user/linux/tools/perf/util/map.c:366:6
    #8 0x7f8278 in machine__resolve /home/user/linux/tools/perf/util/event.c:707:13
    #9 0x5f3d1a in perf_event__process_sample /home/user/linux/tools/perf/builtin-top.c:773:6
    #10 0x5f30e4 in deliver_event /home/user/linux/tools/perf/builtin-top.c:1197:3
    #11 0x908a72 in do_flush /home/user/linux/tools/perf/util/ordered-events.c:244:9
    #12 0x905fae in __ordered_events__flush /home/user/linux/tools/perf/util/ordered-events.c:323:8
    #13 0x9058db in ordered_events__flush /home/user/linux/tools/perf/util/ordered-events.c:341:9
    #14 0x5f19b1 in process_thread /home/user/linux/tools/perf/builtin-top.c:1109:7
    #15 0x7f4f6a21a298 in start_thread /usr/src/debug/glibc-2.33-16.fc34.x86_64/nptl/pthread_create.c:481:8
    #16 0x7f4f697d0352 in clone ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

0x61300009add6 is located 10 bytes to the right of 332-byte region [0x61300009ac80,0x61300009adcc)
allocated by thread T6 here:

    #0 0x4f3f7f in malloc (/home/user/linux/tools/perf/perf+0x4f3f7f)
    #1 0x7f4f6a0a88d9  (/lib64/libelf.so.1+0xa8d9)

Thread T6 created by T0 here:

    #0 0x464856 in pthread_create (/home/user/linux/tools/perf/perf+0x464856)
    #1 0x5f06e0 in __cmd_top /home/user/linux/tools/perf/builtin-top.c:1309:6
    #2 0x5ef19f in cmd_top /home/user/linux/tools/perf/builtin-top.c:1762:11
    #3 0x7b28c0 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
    #4 0x7b119f in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
    #5 0x7b2423 in run_argv /home/user/linux/tools/perf/perf.c:409:2
    #6 0x7b0c19 in main /home/user/linux/tools/perf/perf.c:539:3
    #7 0x7f4f696f7b74 in __libc_start_main /usr/src/debug/glibc-2.33-16.fc34.x86_64/csu/../csu/libc-start.c:332:16

  SUMMARY: AddressSanitizer: heap-buffer-overflow (/home/user/linux/tools/perf/perf+0x49875b) in StrstrCheck(void*, char*, char const*, char const*)
  Shadow bytes around the buggy address:
    0x0c268000b560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
    0x0c268000b570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
    0x0c268000b580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
    0x0c268000b590: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    0x0c268000b5a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  =>0x0c268000b5b0: 00 00 00 00 00 00 00 00 00 04[fa]fa fa fa fa fa
    0x0c268000b5c0: fa fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00
    0x0c268000b5d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    0x0c268000b5e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    0x0c268000b5f0: 07 fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
    0x0c268000b600: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  Shadow byte legend (one shadow byte represents 8 application bytes):
    Addressable:           00
    Partially addressable: 01 02 03 04 05 06 07
    Heap left redzone:       fa
    Freed heap region:       fd
    Stack left redzone:      f1
    Stack mid redzone:       f2
    Stack right redzone:     f3
    Stack after return:      f5
    Stack use after scope:   f8
    Global redzone:          f9
    Global init order:       f6
    Poisoned by user:        f7
    Container overflow:      fc
    Array cookie:            ac
    Intra object redzone:    bb
    ASan internal:           fe
    Left alloca redzone:     ca
    Right alloca redzone:    cb
    Shadow gap:              cc
  ==363148==ABORTING

Suggested-by: Jiri Slaby <[email protected]>
Signed-off-by: Riccardo Mancini <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Fabian Hemmer <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Jiri Slaby <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Remi Bernon <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
jnettlet pushed a commit that referenced this issue Dec 10, 2021
[ Upstream commit 8ef9dc0 ]

We got the following lockdep splat while running fstests (specifically
btrfs/003 and btrfs/020 in a row) with the new rc.  This was uncovered
by 87579e9 ("loop: use worker per cgroup instead of kworker") which
converted loop to using workqueues, which comes with lockdep
annotations that don't exist with kworkers.  The lockdep splat is as
follows:

  WARNING: possible circular locking dependency detected
  5.14.0-rc2-custom+ #34 Not tainted
  ------------------------------------------------------
  losetup/156417 is trying to acquire lock:
  ffff9c7645b02d38 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x84/0x600

  but task is already holding lock:
  ffff9c7647395468 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x650 [loop]

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #5 (&lo->lo_mutex){+.+.}-{3:3}:
	 __mutex_lock+0xba/0x7c0
	 lo_open+0x28/0x60 [loop]
	 blkdev_get_whole+0x28/0xf0
	 blkdev_get_by_dev.part.0+0x168/0x3c0
	 blkdev_open+0xd2/0xe0
	 do_dentry_open+0x163/0x3a0
	 path_openat+0x74d/0xa40
	 do_filp_open+0x9c/0x140
	 do_sys_openat2+0xb1/0x170
	 __x64_sys_openat+0x54/0x90
	 do_syscall_64+0x3b/0x90
	 entry_SYSCALL_64_after_hwframe+0x44/0xae

  -> #4 (&disk->open_mutex){+.+.}-{3:3}:
	 __mutex_lock+0xba/0x7c0
	 blkdev_get_by_dev.part.0+0xd1/0x3c0
	 blkdev_get_by_path+0xc0/0xd0
	 btrfs_scan_one_device+0x52/0x1f0 [btrfs]
	 btrfs_control_ioctl+0xac/0x170 [btrfs]
	 __x64_sys_ioctl+0x83/0xb0
	 do_syscall_64+0x3b/0x90
	 entry_SYSCALL_64_after_hwframe+0x44/0xae

  -> #3 (uuid_mutex){+.+.}-{3:3}:
	 __mutex_lock+0xba/0x7c0
	 btrfs_rm_device+0x48/0x6a0 [btrfs]
	 btrfs_ioctl+0x2d1c/0x3110 [btrfs]
	 __x64_sys_ioctl+0x83/0xb0
	 do_syscall_64+0x3b/0x90
	 entry_SYSCALL_64_after_hwframe+0x44/0xae

  -> #2 (sb_writers#11){.+.+}-{0:0}:
	 lo_write_bvec+0x112/0x290 [loop]
	 loop_process_work+0x25f/0xcb0 [loop]
	 process_one_work+0x28f/0x5d0
	 worker_thread+0x55/0x3c0
	 kthread+0x140/0x170
	 ret_from_fork+0x22/0x30

  -> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}:
	 process_one_work+0x266/0x5d0
	 worker_thread+0x55/0x3c0
	 kthread+0x140/0x170
	 ret_from_fork+0x22/0x30

  -> #0 ((wq_completion)loop0){+.+.}-{0:0}:
	 __lock_acquire+0x1130/0x1dc0
	 lock_acquire+0xf5/0x320
	 flush_workqueue+0xae/0x600
	 drain_workqueue+0xa0/0x110
	 destroy_workqueue+0x36/0x250
	 __loop_clr_fd+0x9a/0x650 [loop]
	 lo_ioctl+0x29d/0x780 [loop]
	 block_ioctl+0x3f/0x50
	 __x64_sys_ioctl+0x83/0xb0
	 do_syscall_64+0x3b/0x90
	 entry_SYSCALL_64_after_hwframe+0x44/0xae

  other info that might help us debug this:
  Chain exists of:
    (wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex
   Possible unsafe locking scenario:
	 CPU0                    CPU1
	 ----                    ----
    lock(&lo->lo_mutex);
				 lock(&disk->open_mutex);
				 lock(&lo->lo_mutex);
    lock((wq_completion)loop0);

   *** DEADLOCK ***
  1 lock held by losetup/156417:
   #0: ffff9c7647395468 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x650 [loop]

  stack backtrace:
  CPU: 8 PID: 156417 Comm: losetup Not tainted 5.14.0-rc2-custom+ #34
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  Call Trace:
   dump_stack_lvl+0x57/0x72
   check_noncircular+0x10a/0x120
   __lock_acquire+0x1130/0x1dc0
   lock_acquire+0xf5/0x320
   ? flush_workqueue+0x84/0x600
   flush_workqueue+0xae/0x600
   ? flush_workqueue+0x84/0x600
   drain_workqueue+0xa0/0x110
   destroy_workqueue+0x36/0x250
   __loop_clr_fd+0x9a/0x650 [loop]
   lo_ioctl+0x29d/0x780 [loop]
   ? __lock_acquire+0x3a0/0x1dc0
   ? update_dl_rq_load_avg+0x152/0x360
   ? lock_is_held_type+0xa5/0x120
   ? find_held_lock.constprop.0+0x2b/0x80
   block_ioctl+0x3f/0x50
   __x64_sys_ioctl+0x83/0xb0
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x44/0xae
  RIP: 0033:0x7f645884de6b

Usually the uuid_mutex exists to protect the fs_devices that map
together all of the devices that match a specific uuid.  In rm_device
we're messing with the uuid of a device, so it makes sense to protect
that here.

However in doing that it pulls in a whole host of lockdep dependencies,
as we call mnt_may_write() on the sb before we grab the uuid_mutex, thus
we end up with the dependency chain under the uuid_mutex being added
under the normal sb write dependency chain, which causes problems with
loop devices.

We don't need the uuid mutex here however.  If we call
btrfs_scan_one_device() before we scratch the super block we will find
the fs_devices and not find the device itself and return EBUSY because
the fs_devices is open.  If we call it after the scratch happens it will
not appear to be a valid btrfs file system.

We do not need to worry about other fs_devices modifying operations here
because we're protected by the exclusive operations locking.

So drop the uuid_mutex here in order to fix the lockdep splat.

A more detailed explanation from the discussion:

We are worried about rm and scan racing with each other, before this
change we'll zero the device out under the UUID mutex so when scan does
run it'll make sure that it can go through the whole device scan thing
without rm messing with us.

We aren't worried if the scratch happens first, because the result is we
don't think this is a btrfs device and we bail out.

The only case we are concerned with is we scratch _after_ scan is able
to read the superblock and gets a seemingly valid super block, so lets
consider this case.

Scan will call device_list_add() with the device we're removing.  We'll
call find_fsid_with_metadata_uuid() and get our fs_devices for this
UUID.  At this point we lock the fs_devices->device_list_mutex.  This is
what protects us in this case, but we have two cases here.

1. We aren't to the device removal part of the RM.  We found our device,
   and device name matches our path, we go down and we set total_devices
   to our super number of devices, which doesn't affect anything because
   we haven't done the remove yet.

2. We are past the device removal part, which is protected by the
   device_list_mutex.  Scan doesn't find the device, it goes down and
   does the

   if (fs_devices->opened)
	   return -EBUSY;

   check and we bail out.

Nothing about this situation is ideal, but the lockdep splat is real,
and the fix is safe, tho admittedly a bit scary looking.

Reviewed-by: Anand Jain <[email protected]>
Signed-off-by: Josef Bacik <[email protected]>
Reviewed-by: David Sterba <[email protected]>
[ copy more from the discussion ]
Signed-off-by: David Sterba <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Dec 10, 2021
[ Upstream commit f0caea8 ]

Olga reports seeing the following Oops when doing O_DIRECT writes to a
pNFS flexfiles server:

Oops: 0000 [#1] SMP PTI
CPU: 1 PID: 234186 Comm: kworker/u8:1 Not tainted 5.15.0-rc4+ #4
Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014
Workqueue: nfsiod rpc_async_release [sunrpc]
RIP: 0010:nfs_mark_request_commit+0x12/0x30 [nfs]
Code: ff ff be 03 00 00 00 e8 ac 34 83 eb e9 29 ff ff
ff e8 22 bc d7 eb 66 90 0f 1f 44 00 00 48 85 f6 74 16 48 8b 42 10 48
8b 40 18 <48> 8b 40 18 48 85 c0 74 05 e9 70 fc 15 ec 48 89 d6 e9 68 ed
ff ff
RSP: 0018:ffffa82f0159fe00 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff8f3393141880 RCX: 0000000000000000
RDX: ffffa82f0159fe08 RSI: ffff8f3381252500 RDI: ffff8f3393141880
RBP: ffff8f33ac317c00 R08: 0000000000000000 R09: ffff8f3487724cb0
R10: 0000000000000008 R11: 0000000000000001 R12: 0000000000000001
R13: ffff8f3485bccee0 R14: ffff8f33ac317c10 R15: ffff8f33ac317cd8
FS:  0000000000000000(0000) GS:ffff8f34fbc80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000018 CR3: 0000000122120006 CR4: 0000000000770ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 nfs_direct_write_completion+0x13b/0x250 [nfs]
 rpc_free_task+0x39/0x60 [sunrpc]
 rpc_async_release+0x29/0x40 [sunrpc]
 process_one_work+0x1ce/0x370
 worker_thread+0x30/0x380
 ? process_one_work+0x370/0x370
 kthread+0x11a/0x140
 ? set_kthread_struct+0x40/0x40
 ret_from_fork+0x22/0x30

Reported-by: Olga Kornievskaia <[email protected]>
Fixes: 9c455a8 ("NFS/pNFS: Clean up pNFS commit operations")
Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Dec 10, 2021
[ Upstream commit 5ec0a6f ]

Host crashes when pci_enable_atomic_ops_to_root() is called for VFs with
virtual buses. The virtual buses added to SR-IOV have bus->self set to NULL
and host crashes due to this.

  PID: 4481   TASK: ffff89c6941b0000  CPU: 53  COMMAND: "bash"
  ...
   #3 [ffff9a9481713808] oops_end at ffffffffb9025cd6
   #4 [ffff9a9481713828] page_fault_oops at ffffffffb906e417
   #5 [ffff9a9481713888] exc_page_fault at ffffffffb9a0ad14
   #6 [ffff9a94817138b0] asm_exc_page_fault at ffffffffb9c00ace
      [exception RIP: pcie_capability_read_dword+28]
      RIP: ffffffffb952fd5c  RSP: ffff9a9481713960  RFLAGS: 00010246
      RAX: 0000000000000001  RBX: ffff89c6b1096000  RCX: 0000000000000000
      RDX: ffff9a9481713990  RSI: 0000000000000024  RDI: 0000000000000000
      RBP: 0000000000000080   R8: 0000000000000008   R9: ffff89c64341a2f8
      R10: 0000000000000002  R11: 0000000000000000  R12: ffff89c648bab000
      R13: 0000000000000000  R14: 0000000000000000  R15: ffff89c648bab0c8
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
   #7 [ffff9a9481713988] pci_enable_atomic_ops_to_root at ffffffffb95359a6
   #8 [ffff9a94817139c0] bnxt_qplib_determine_atomics at ffffffffc08c1a33 [bnxt_re]
   #9 [ffff9a94817139d0] bnxt_re_dev_init at ffffffffc08ba2d1 [bnxt_re]

Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit in Device
Control 2 is reserved for VFs.  The PF value applies to all associated VFs.

Return -EINVAL if pci_enable_atomic_ops_to_root() is called for a VF.

Link: https://lore.kernel.org/r/[email protected]
Fixes: 35f5ace ("RDMA/bnxt_re: Enable global atomic ops if platform supports")
Fixes: 430a236 ("PCI: Add pci_enable_atomic_ops_to_root()")
Signed-off-by: Selvin Xavier <[email protected]>
Signed-off-by: Bjorn Helgaas <[email protected]>
Reviewed-by: Andy Gospodarek <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Dec 10, 2021
commit 2aa3660 upstream.

It is generally unsafe to call put_device() with dpm_list_mtx held,
because the given device's release routine may carry out an action
depending on that lock which then may deadlock, so modify the
system-wide suspend and resume of devices to always drop dpm_list_mtx
before calling put_device() (and adjust white space somewhat while
at it).

For instance, this prevents the following splat from showing up in
the kernel log after a system resume in certain configurations:

[ 3290.969514] ======================================================
[ 3290.969517] WARNING: possible circular locking dependency detected
[ 3290.969519] 5.15.0+ #2420 Tainted: G S
[ 3290.969523] ------------------------------------------------------
[ 3290.969525] systemd-sleep/4553 is trying to acquire lock:
[ 3290.969529] ffff888117ab1138 ((wq_completion)hci0#2){+.+.}-{0:0}, at: flush_workqueue+0x87/0x4a0
[ 3290.969554]
               but task is already holding lock:
[ 3290.969556] ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0
[ 3290.969571]
               which lock already depends on the new lock.

[ 3290.969573]
               the existing dependency chain (in reverse order) is:
[ 3290.969575]
               -> #3 (dpm_list_mtx){+.+.}-{3:3}:
[ 3290.969583]        __mutex_lock+0x9d/0xa30
[ 3290.969591]        device_pm_add+0x2e/0xe0
[ 3290.969597]        device_add+0x4d5/0x8f0
[ 3290.969605]        hci_conn_add_sysfs+0x43/0xb0 [bluetooth]
[ 3290.969689]        hci_conn_complete_evt.isra.71+0x124/0x750 [bluetooth]
[ 3290.969747]        hci_event_packet+0xd6c/0x28a0 [bluetooth]
[ 3290.969798]        hci_rx_work+0x213/0x640 [bluetooth]
[ 3290.969842]        process_one_work+0x2aa/0x650
[ 3290.969851]        worker_thread+0x39/0x400
[ 3290.969859]        kthread+0x142/0x170
[ 3290.969865]        ret_from_fork+0x22/0x30
[ 3290.969872]
               -> #2 (&hdev->lock){+.+.}-{3:3}:
[ 3290.969881]        __mutex_lock+0x9d/0xa30
[ 3290.969887]        hci_event_packet+0xba/0x28a0 [bluetooth]
[ 3290.969935]        hci_rx_work+0x213/0x640 [bluetooth]
[ 3290.969978]        process_one_work+0x2aa/0x650
[ 3290.969985]        worker_thread+0x39/0x400
[ 3290.969993]        kthread+0x142/0x170
[ 3290.969999]        ret_from_fork+0x22/0x30
[ 3290.970004]
               -> #1 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}:
[ 3290.970013]        process_one_work+0x27d/0x650
[ 3290.970020]        worker_thread+0x39/0x400
[ 3290.970028]        kthread+0x142/0x170
[ 3290.970033]        ret_from_fork+0x22/0x30
[ 3290.970038]
               -> #0 ((wq_completion)hci0#2){+.+.}-{0:0}:
[ 3290.970047]        __lock_acquire+0x15cb/0x1b50
[ 3290.970054]        lock_acquire+0x26c/0x300
[ 3290.970059]        flush_workqueue+0xae/0x4a0
[ 3290.970066]        drain_workqueue+0xa1/0x130
[ 3290.970073]        destroy_workqueue+0x34/0x1f0
[ 3290.970081]        hci_release_dev+0x49/0x180 [bluetooth]
[ 3290.970130]        bt_host_release+0x1d/0x30 [bluetooth]
[ 3290.970195]        device_release+0x33/0x90
[ 3290.970201]        kobject_release+0x63/0x160
[ 3290.970211]        dpm_resume+0x164/0x3e0
[ 3290.970215]        dpm_resume_end+0xd/0x20
[ 3290.970220]        suspend_devices_and_enter+0x1a4/0xba0
[ 3290.970229]        pm_suspend+0x26b/0x310
[ 3290.970236]        state_store+0x42/0x90
[ 3290.970243]        kernfs_fop_write_iter+0x135/0x1b0
[ 3290.970251]        new_sync_write+0x125/0x1c0
[ 3290.970257]        vfs_write+0x360/0x3c0
[ 3290.970263]        ksys_write+0xa7/0xe0
[ 3290.970269]        do_syscall_64+0x3a/0x80
[ 3290.970276]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 3290.970284]
               other info that might help us debug this:

[ 3290.970285] Chain exists of:
                 (wq_completion)hci0#2 --> &hdev->lock --> dpm_list_mtx

[ 3290.970297]  Possible unsafe locking scenario:

[ 3290.970299]        CPU0                    CPU1
[ 3290.970300]        ----                    ----
[ 3290.970302]   lock(dpm_list_mtx);
[ 3290.970306]                                lock(&hdev->lock);
[ 3290.970310]                                lock(dpm_list_mtx);
[ 3290.970314]   lock((wq_completion)hci0#2);
[ 3290.970319]
                *** DEADLOCK ***

[ 3290.970321] 7 locks held by systemd-sleep/4553:
[ 3290.970325]  #0: ffff888103bcd448 (sb_writers#4){.+.+}-{0:0}, at: ksys_write+0xa7/0xe0
[ 3290.970341]  #1: ffff888115a14488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x103/0x1b0
[ 3290.970355]  #2: ffff888100f719e0 (kn->active#233){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x10c/0x1b0
[ 3290.970369]  #3: ffffffff82661048 (autosleep_lock){+.+.}-{3:3}, at: state_store+0x12/0x90
[ 3290.970384]  #4: ffffffff82658ac8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x9f/0x310
[ 3290.970399]  #5: ffffffff827f2a48 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x4c/0x80
[ 3290.970416]  #6: ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0
[ 3290.970428]
               stack backtrace:
[ 3290.970431] CPU: 3 PID: 4553 Comm: systemd-sleep Tainted: G S                5.15.0+ #2420
[ 3290.970438] Hardware name: Dell Inc. XPS 13 9380/0RYJWW, BIOS 1.5.0 06/03/2019
[ 3290.970441] Call Trace:
[ 3290.970446]  dump_stack_lvl+0x44/0x57
[ 3290.970454]  check_noncircular+0x105/0x120
[ 3290.970468]  ? __lock_acquire+0x15cb/0x1b50
[ 3290.970474]  __lock_acquire+0x15cb/0x1b50
[ 3290.970487]  lock_acquire+0x26c/0x300
[ 3290.970493]  ? flush_workqueue+0x87/0x4a0
[ 3290.970503]  ? __raw_spin_lock_init+0x3b/0x60
[ 3290.970510]  ? lockdep_init_map_type+0x58/0x240
[ 3290.970519]  flush_workqueue+0xae/0x4a0
[ 3290.970526]  ? flush_workqueue+0x87/0x4a0
[ 3290.970544]  ? drain_workqueue+0xa1/0x130
[ 3290.970552]  drain_workqueue+0xa1/0x130
[ 3290.970561]  destroy_workqueue+0x34/0x1f0
[ 3290.970572]  hci_release_dev+0x49/0x180 [bluetooth]
[ 3290.970624]  bt_host_release+0x1d/0x30 [bluetooth]
[ 3290.970687]  device_release+0x33/0x90
[ 3290.970695]  kobject_release+0x63/0x160
[ 3290.970705]  dpm_resume+0x164/0x3e0
[ 3290.970710]  ? dpm_resume_early+0x251/0x3b0
[ 3290.970718]  dpm_resume_end+0xd/0x20
[ 3290.970723]  suspend_devices_and_enter+0x1a4/0xba0
[ 3290.970737]  pm_suspend+0x26b/0x310
[ 3290.970746]  state_store+0x42/0x90
[ 3290.970755]  kernfs_fop_write_iter+0x135/0x1b0
[ 3290.970764]  new_sync_write+0x125/0x1c0
[ 3290.970777]  vfs_write+0x360/0x3c0
[ 3290.970785]  ksys_write+0xa7/0xe0
[ 3290.970794]  do_syscall_64+0x3a/0x80
[ 3290.970803]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 3290.970811] RIP: 0033:0x7f41b1328164
[ 3290.970819] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 4a d2 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83
[ 3290.970824] RSP: 002b:00007ffe6ae21b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 3290.970831] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f41b1328164
[ 3290.970836] RDX: 0000000000000004 RSI: 000055965e651070 RDI: 0000000000000004
[ 3290.970839] RBP: 000055965e651070 R08: 000055965e64f390 R09: 00007f41b1e3d1c0
[ 3290.970843] R10: 000000000000000a R11: 0000000000000246 R12: 0000000000000004
[ 3290.970846] R13: 0000000000000001 R14: 000055965e64f2b0 R15: 0000000000000004

Cc: All applicable <[email protected]>
Signed-off-by: Rafael J. Wysocki <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
jnettlet pushed a commit that referenced this issue Dec 10, 2021
[ Upstream commit 54659ca ]

when turning off a connection, lockdep complains with the
following warning (a modprobe has been done but the same
happens with a disconnection from NetworkManager,
it's enough to trigger a cfg80211_disconnect call):

[  682.855867] ======================================================
[  682.855877] WARNING: possible circular locking dependency detected
[  682.855887] 5.14.0-rc6+ #16 Tainted: G         C OE
[  682.855898] ------------------------------------------------------
[  682.855906] modprobe/1770 is trying to acquire lock:
[  682.855916] ffffb6d000332b00 (&pxmitpriv->lock){+.-.}-{2:2},
		at: rtw_free_stainfo+0x52/0x4a0 [r8723bs]
[  682.856073]
               but task is already holding lock:
[  682.856081] ffffb6d0003336a8 (&pstapriv->sta_hash_lock){+.-.}-{2:2},
		at: rtw_free_assoc_resources+0x48/0x110 [r8723bs]
[  682.856207]
               which lock already depends on the new lock.

[  682.856215]
               the existing dependency chain (in reverse order) is:
[  682.856223]
               -> #1 (&pstapriv->sta_hash_lock){+.-.}-{2:2}:
[  682.856247]        _raw_spin_lock_bh+0x34/0x40
[  682.856265]        rtw_get_stainfo+0x9a/0x110 [r8723bs]
[  682.856389]        rtw_xmit_classifier+0x27/0x130 [r8723bs]
[  682.856515]        rtw_xmitframe_enqueue+0xa/0x20 [r8723bs]
[  682.856642]        rtl8723bs_hal_xmit+0x3b/0xb0 [r8723bs]
[  682.856752]        rtw_xmit+0x4ef/0x890 [r8723bs]
[  682.856879]        _rtw_xmit_entry+0xba/0x350 [r8723bs]
[  682.856981]        dev_hard_start_xmit+0xee/0x320
[  682.856999]        sch_direct_xmit+0x8c/0x330
[  682.857014]        __dev_queue_xmit+0xba5/0xf00
[  682.857030]        packet_sendmsg+0x981/0x1b80
[  682.857047]        sock_sendmsg+0x5b/0x60
[  682.857060]        __sys_sendto+0xf1/0x160
[  682.857073]        __x64_sys_sendto+0x24/0x30
[  682.857087]        do_syscall_64+0x3a/0x80
[  682.857102]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[  682.857117]
               -> #0 (&pxmitpriv->lock){+.-.}-{2:2}:
[  682.857142]        __lock_acquire+0xfd9/0x1b50
[  682.857158]        lock_acquire+0xb4/0x2c0
[  682.857172]        _raw_spin_lock_bh+0x34/0x40
[  682.857185]        rtw_free_stainfo+0x52/0x4a0 [r8723bs]
[  682.857308]        rtw_free_assoc_resources+0x53/0x110 [r8723bs]
[  682.857415]        cfg80211_rtw_disconnect+0x4b/0x70 [r8723bs]
[  682.857522]        cfg80211_disconnect+0x12e/0x2f0 [cfg80211]
[  682.857759]        cfg80211_leave+0x2b/0x40 [cfg80211]
[  682.857961]        cfg80211_netdev_notifier_call+0xa9/0x560 [cfg80211]
[  682.858163]        raw_notifier_call_chain+0x41/0x50
[  682.858180]        __dev_close_many+0x62/0x100
[  682.858195]        dev_close_many+0x7d/0x120
[  682.858209]        unregister_netdevice_many+0x416/0x680
[  682.858225]        unregister_netdevice_queue+0xab/0xf0
[  682.858240]        unregister_netdev+0x18/0x20
[  682.858255]        rtw_unregister_netdevs+0x28/0x40 [r8723bs]
[  682.858360]        rtw_dev_remove+0x24/0xd0 [r8723bs]
[  682.858463]        sdio_bus_remove+0x31/0xd0 [mmc_core]
[  682.858532]        device_release_driver_internal+0xf7/0x1d0
[  682.858550]        driver_detach+0x47/0x90
[  682.858564]        bus_remove_driver+0x77/0xd0
[  682.858579]        rtw_drv_halt+0xc/0x678 [r8723bs]
[  682.858685]        __x64_sys_delete_module+0x13f/0x250
[  682.858699]        do_syscall_64+0x3a/0x80
[  682.858715]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[  682.858729]
               other info that might help us debug this:

[  682.858737]  Possible unsafe locking scenario:

[  682.858744]        CPU0                    CPU1
[  682.858751]        ----                    ----
[  682.858758]   lock(&pstapriv->sta_hash_lock);
[  682.858772]                                lock(&pxmitpriv->lock);
[  682.858786]                                lock(&pstapriv->sta_hash_lock);
[  682.858799]   lock(&pxmitpriv->lock);
[  682.858812]
                *** DEADLOCK ***

[  682.858820] 5 locks held by modprobe/1770:
[  682.858831]  #0: ffff8d870697d980 (&dev->mutex){....}-{3:3},
		at: device_release_driver_internal+0x1a/0x1d0
[  682.858869]  #1: ffffffffbdbbf1c8 (rtnl_mutex){+.+.}-{3:3},
		at: unregister_netdev+0xe/0x20
[  682.858906]  #2: ffff8d87054ee5e8 (&rdev->wiphy.mtx){+.+.}-{3:3},
		at: cfg80211_netdev_notifier_call+0x9e/0x560 [cfg80211]
[  682.859131]  #3: ffff8d870f2bc8f0 (&wdev->mtx){+.+.}-{3:3},
		at: cfg80211_leave+0x20/0x40 [cfg80211]
[  682.859354]  #4: ffffb6d0003336a8 (&pstapriv->sta_hash_lock){+.-.}-{2:2},
		at: rtw_free_assoc_resources+0x48/0x110 [r8723bs]
[  682.859482]
               stack backtrace:
[  682.859491] CPU: 1 PID: 1770 Comm: modprobe Tainted: G
		C OE     5.14.0-rc6+ #16
[  682.859507] Hardware name: LENOVO 80NR/Madrid, BIOS DACN25WW 08/20/2015
[  682.859517] Call Trace:
[  682.859531]  dump_stack_lvl+0x56/0x6f
[  682.859551]  check_noncircular+0xdb/0xf0
[  682.859579]  __lock_acquire+0xfd9/0x1b50
[  682.859606]  lock_acquire+0xb4/0x2c0
[  682.859623]  ? rtw_free_stainfo+0x52/0x4a0 [r8723bs]
[  682.859752]  ? mark_held_locks+0x48/0x70
[  682.859769]  ? rtw_free_stainfo+0x4a/0x4a0 [r8723bs]
[  682.859898]  _raw_spin_lock_bh+0x34/0x40
[  682.859914]  ? rtw_free_stainfo+0x52/0x4a0 [r8723bs]
[  682.860039]  rtw_free_stainfo+0x52/0x4a0 [r8723bs]
[  682.860171]  rtw_free_assoc_resources+0x53/0x110 [r8723bs]
[  682.860286]  cfg80211_rtw_disconnect+0x4b/0x70 [r8723bs]
[  682.860397]  cfg80211_disconnect+0x12e/0x2f0 [cfg80211]
[  682.860629]  cfg80211_leave+0x2b/0x40 [cfg80211]
[  682.860836]  cfg80211_netdev_notifier_call+0xa9/0x560 [cfg80211]
[  682.861048]  ? __lock_acquire+0x4dc/0x1b50
[  682.861070]  ? lock_is_held_type+0xa8/0x110
[  682.861089]  ? lock_is_held_type+0xa8/0x110
[  682.861104]  ? find_held_lock+0x2d/0x90
[  682.861120]  ? packet_notifier+0x173/0x300
[  682.861141]  ? lock_release+0xb3/0x250
[  682.861160]  ? packet_notifier+0x192/0x300
[  682.861184]  raw_notifier_call_chain+0x41/0x50
[  682.861205]  __dev_close_many+0x62/0x100
[  682.861224]  dev_close_many+0x7d/0x120
[  682.861245]  unregister_netdevice_many+0x416/0x680
[  682.861264]  ? find_held_lock+0x2d/0x90
[  682.861284]  unregister_netdevice_queue+0xab/0xf0
[  682.861306]  unregister_netdev+0x18/0x20
[  682.861325]  rtw_unregister_netdevs+0x28/0x40 [r8723bs]
[  682.861434]  rtw_dev_remove+0x24/0xd0 [r8723bs]
[  682.861542]  sdio_bus_remove+0x31/0xd0 [mmc_core]
[  682.861615]  device_release_driver_internal+0xf7/0x1d0
[  682.861637]  driver_detach+0x47/0x90
[  682.861656]  bus_remove_driver+0x77/0xd0
[  682.861674]  rtw_drv_halt+0xc/0x678 [r8723bs]
[  682.861782]  __x64_sys_delete_module+0x13f/0x250
[  682.861801]  ? lockdep_hardirqs_on_prepare+0xf3/0x170
[  682.861817]  ? syscall_enter_from_user_mode+0x20/0x70
[  682.861836]  do_syscall_64+0x3a/0x80
[  682.861855]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  682.861873] RIP: 0033:0x7f6dbe85400b
[  682.861890] Code: 73 01 c3 48 8b 0d 6d 1e 0c 00 f7 d8 64 89
01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d
1e 0c 00 f7 d8 64 89 01 48
[  682.861906] RSP: 002b:00007ffe7a82f538 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[  682.861923] RAX: ffffffffffffffda RBX: 000055a64693bd20 RCX: 00007f6dbe85400b
[  682.861935] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055a64693bd88
[  682.861946] RBP: 000055a64693bd20 R08: 0000000000000000 R09: 0000000000000000
[  682.861957] R10: 00007f6dbe8c7ac0 R11: 0000000000000206 R12: 000055a64693bd88
[  682.861967] R13: 0000000000000000 R14: 000055a64693bd88 R15: 00007ffe7a831848

This happens because when we enqueue a frame for
transmission we do it under xmit_priv lock, then calling
rtw_get_stainfo (needed for enqueuing) takes sta_hash_lock
and this leads to the following lock dependency:

xmit_priv->lock -> sta_hash_lock

Turning off a connection will bring to call
rtw_free_assoc_resources which will set up
the inverse dependency:

sta_hash_lock -> xmit_priv_lock

This could lead to a deadlock as lockdep complains.

Fix it by removing the xmit_priv->lock around
rtw_xmitframe_enqueue call inside rtl8723bs_hal_xmit
and put it in a smaller critical section inside
rtw_xmit_classifier, the only place where
xmit_priv data are actually accessed.

Replace spin_{lock,unlock}_bh(pxmitpriv->lock)
in other tx paths leading to rtw_xmitframe_enqueue
call with spin_{lock,unlock}_bh(psta->sleep_q.lock)
- it's not clear why accessing a sleep_q was protected
by a spinlock on xmitpriv->lock.

This way is avoided the same faulty lock nesting
order.

Extra changes in v2 by Hans de Goede:
-Lift the taking of the struct __queue.lock spinlock out of
 rtw_free_xmitframe_queue() into the callers this allows also
 protecting a bunch of related state in rtw_free_stainfo():
-Protect psta->sleepq_len on rtw_free_xmitframe_queue(&psta->sleep_q);
-Protect struct tx_servq.tx_pending and tx_servq.qcnt when
 calling rtw_free_xmitframe_queue(&tx_servq.sta_pending)
-This also allows moving the spin_lock_bh(&pxmitpriv->lock); to below
 the sleep_q free-ing code, avoiding another ABBA locking issue

CC: Larry Finger <[email protected]>
Co-developed-by: Hans de Goede <[email protected]>
Tested-on: Lenovo Ideapad MiiX 300-10IBY
Signed-off-by: Fabio Aiuto <[email protected]>
Signed-off-by: Hans de Goede <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Mar 28, 2022
[ Upstream commit 4cb9a99 ]

I saw the below splatting after the host suspended and resumed.

   WARNING: CPU: 0 PID: 2943 at kvm/arch/x86/kvm/../../../virt/kvm/kvm_main.c:5531 kvm_resume+0x2c/0x30 [kvm]
   CPU: 0 PID: 2943 Comm: step_after_susp Tainted: G        W IOE     5.17.0-rc3+ #4
   RIP: 0010:kvm_resume+0x2c/0x30 [kvm]
   Call Trace:
    <TASK>
    syscore_resume+0x90/0x340
    suspend_devices_and_enter+0xaee/0xe90
    pm_suspend.cold+0x36b/0x3c2
    state_store+0x82/0xf0
    kernfs_fop_write_iter+0x1b6/0x260
    new_sync_write+0x258/0x370
    vfs_write+0x33f/0x510
    ksys_write+0xc9/0x160
    do_syscall_64+0x3b/0xc0
    entry_SYSCALL_64_after_hwframe+0x44/0xae

lockdep_is_held() can return -1 when lockdep is disabled which triggers
this warning. Let's use lockdep_assert_not_held() which can detect
incorrect calls while holding a lock and it also avoids false negatives
when lockdep is disabled.

Signed-off-by: Wanpeng Li <[email protected]>
Message-Id: <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Sep 29, 2022
[ Upstream commit 84a5358 ]

The SRv6 layer allows defining HMAC data that can later be used to sign IPv6
Segment Routing Headers. This configuration is realised via netlink through
four attributes: SEG6_ATTR_HMACKEYID, SEG6_ATTR_SECRET, SEG6_ATTR_SECRETLEN and
SEG6_ATTR_ALGID. Because the SECRETLEN attribute is decoupled from the actual
length of the SECRET attribute, it is possible to provide invalid combinations
(e.g., secret = "", secretlen = 64). This case is not checked in the code and
with an appropriately crafted netlink message, an out-of-bounds read of up
to 64 bytes (max secret length) can occur past the skb end pointer and into
skb_shared_info:

Breakpoint 1, seg6_genl_sethmac (skb=<optimized out>, info=<optimized out>) at net/ipv6/seg6.c:208
208		memcpy(hinfo->secret, secret, slen);
(gdb) bt
 #0  seg6_genl_sethmac (skb=<optimized out>, info=<optimized out>) at net/ipv6/seg6.c:208
 #1  0xffffffff81e012e9 in genl_family_rcv_msg_doit (skb=skb@entry=0xffff88800b1f9f00, nlh=nlh@entry=0xffff88800b1b7600,
    extack=extack@entry=0xffffc90000ba7af0, ops=ops@entry=0xffffc90000ba7a80, hdrlen=4, net=0xffffffff84237580 <init_net>, family=<optimized out>,
    family=<optimized out>) at net/netlink/genetlink.c:731
 #2  0xffffffff81e01435 in genl_family_rcv_msg (extack=0xffffc90000ba7af0, nlh=0xffff88800b1b7600, skb=0xffff88800b1f9f00,
    family=0xffffffff82fef6c0 <seg6_genl_family>) at net/netlink/genetlink.c:775
 #3  genl_rcv_msg (skb=0xffff88800b1f9f00, nlh=0xffff88800b1b7600, extack=0xffffc90000ba7af0) at net/netlink/genetlink.c:792
 #4  0xffffffff81dfffc3 in netlink_rcv_skb (skb=skb@entry=0xffff88800b1f9f00, cb=cb@entry=0xffffffff81e01350 <genl_rcv_msg>)
    at net/netlink/af_netlink.c:2501
 #5  0xffffffff81e00919 in genl_rcv (skb=0xffff88800b1f9f00) at net/netlink/genetlink.c:803
 #6  0xffffffff81dff6ae in netlink_unicast_kernel (ssk=0xffff888010eec800, skb=0xffff88800b1f9f00, sk=0xffff888004aed000)
    at net/netlink/af_netlink.c:1319
 #7  netlink_unicast (ssk=ssk@entry=0xffff888010eec800, skb=skb@entry=0xffff88800b1f9f00, portid=portid@entry=0, nonblock=<optimized out>)
    at net/netlink/af_netlink.c:1345
 #8  0xffffffff81dff9a4 in netlink_sendmsg (sock=<optimized out>, msg=0xffffc90000ba7e48, len=<optimized out>) at net/netlink/af_netlink.c:1921
...
(gdb) p/x ((struct sk_buff *)0xffff88800b1f9f00)->head + ((struct sk_buff *)0xffff88800b1f9f00)->end
$1 = 0xffff88800b1b76c0
(gdb) p/x secret
$2 = 0xffff88800b1b76c0
(gdb) p slen
$3 = 64 '@'

The OOB data can then be read back from userspace by dumping HMAC state. This
commit fixes this by ensuring SECRETLEN cannot exceed the actual length of
SECRET.

Reported-by: Lucas Leong <[email protected]>
Tested: verified that EINVAL is correctly returned when secretlen > len(secret)
Fixes: 4f4853d ("ipv6: sr: implement API to control SR HMAC structure")
Signed-off-by: David Lebrun <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Sep 29, 2022
[ Upstream commit 0e400d6 ]

Fix a NULL dereference of the struct bonding.rr_tx_counter member because
if a bond is initially created with an initial mode != zero (Round Robin)
the memory required for the counter is never created and when the mode is
changed there is never any attempt to verify the memory is allocated upon
switching modes.

This causes the following Oops on an aarch64 machine:
    [  334.686773] Unable to handle kernel paging request at virtual address ffff2c91ac905000
    [  334.694703] Mem abort info:
    [  334.697486]   ESR = 0x0000000096000004
    [  334.701234]   EC = 0x25: DABT (current EL), IL = 32 bits
    [  334.706536]   SET = 0, FnV = 0
    [  334.709579]   EA = 0, S1PTW = 0
    [  334.712719]   FSC = 0x04: level 0 translation fault
    [  334.717586] Data abort info:
    [  334.720454]   ISV = 0, ISS = 0x00000004
    [  334.724288]   CM = 0, WnR = 0
    [  334.727244] swapper pgtable: 4k pages, 48-bit VAs, pgdp=000008044d662000
    [  334.733944] [ffff2c91ac905000] pgd=0000000000000000, p4d=0000000000000000
    [  334.740734] Internal error: Oops: 96000004 [#1] SMP
    [  334.745602] Modules linked in: bonding tls veth rfkill sunrpc arm_spe_pmu vfat fat acpi_ipmi ipmi_ssif ixgbe igb i40e mdio ipmi_devintf ipmi_msghandler arm_cmn arm_dsu_pmu cppc_cpufreq acpi_tad fuse zram crct10dif_ce ast ghash_ce sbsa_gwdt nvme drm_vram_helper drm_ttm_helper nvme_core ttm xgene_hwmon
    [  334.772217] CPU: 7 PID: 2214 Comm: ping Not tainted 6.0.0-rc4-00133-g64ae13ed4784 #4
    [  334.779950] Hardware name: GIGABYTE R272-P31-00/MP32-AR1-00, BIOS F18v (SCP: 1.08.20211002) 12/01/2021
    [  334.789244] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
    [  334.796196] pc : bond_rr_gen_slave_id+0x40/0x124 [bonding]
    [  334.801691] lr : bond_xmit_roundrobin_slave_get+0x38/0xdc [bonding]
    [  334.807962] sp : ffff8000221733e0
    [  334.811265] x29: ffff8000221733e0 x28: ffffdbac8572d198 x27: ffff80002217357c
    [  334.818392] x26: 000000000000002a x25: ffffdbacb33ee000 x24: ffff07ff980fa000
    [  334.825519] x23: ffffdbacb2e398ba x22: ffff07ff98102000 x21: ffff07ff981029c0
    [  334.832646] x20: 0000000000000001 x19: ffff07ff981029c0 x18: 0000000000000014
    [  334.839773] x17: 0000000000000000 x16: ffffdbacb1004364 x15: 0000aaaabe2f5a62
    [  334.846899] x14: ffff07ff8e55d968 x13: ffff07ff8e55db30 x12: 0000000000000000
    [  334.854026] x11: ffffdbacb21532e8 x10: 0000000000000001 x9 : ffffdbac857178ec
    [  334.861153] x8 : ffff07ff9f6e5a28 x7 : 0000000000000000 x6 : 000000007c2b3742
    [  334.868279] x5 : ffff2c91ac905000 x4 : ffff2c91ac905000 x3 : ffff07ff9f554400
    [  334.875406] x2 : ffff2c91ac905000 x1 : 0000000000000001 x0 : ffff07ff981029c0
    [  334.882532] Call trace:
    [  334.884967]  bond_rr_gen_slave_id+0x40/0x124 [bonding]
    [  334.890109]  bond_xmit_roundrobin_slave_get+0x38/0xdc [bonding]
    [  334.896033]  __bond_start_xmit+0x128/0x3a0 [bonding]
    [  334.901001]  bond_start_xmit+0x54/0xb0 [bonding]
    [  334.905622]  dev_hard_start_xmit+0xb4/0x220
    [  334.909798]  __dev_queue_xmit+0x1a0/0x720
    [  334.913799]  arp_xmit+0x3c/0xbc
    [  334.916932]  arp_send_dst+0x98/0xd0
    [  334.920410]  arp_solicit+0xe8/0x230
    [  334.923888]  neigh_probe+0x60/0xb0
    [  334.927279]  __neigh_event_send+0x3b0/0x470
    [  334.931453]  neigh_resolve_output+0x70/0x90
    [  334.935626]  ip_finish_output2+0x158/0x514
    [  334.939714]  __ip_finish_output+0xac/0x1a4
    [  334.943800]  ip_finish_output+0x40/0xfc
    [  334.947626]  ip_output+0xf8/0x1a4
    [  334.950931]  ip_send_skb+0x5c/0x100
    [  334.954410]  ip_push_pending_frames+0x3c/0x60
    [  334.958758]  raw_sendmsg+0x458/0x6d0
    [  334.962325]  inet_sendmsg+0x50/0x80
    [  334.965805]  sock_sendmsg+0x60/0x6c
    [  334.969286]  __sys_sendto+0xc8/0x134
    [  334.972853]  __arm64_sys_sendto+0x34/0x4c
    [  334.976854]  invoke_syscall+0x78/0x100
    [  334.980594]  el0_svc_common.constprop.0+0x4c/0xf4
    [  334.985287]  do_el0_svc+0x38/0x4c
    [  334.988591]  el0_svc+0x34/0x10c
    [  334.991724]  el0t_64_sync_handler+0x11c/0x150
    [  334.996072]  el0t_64_sync+0x190/0x194
    [  334.999726] Code: b9001062 f9403c02 d53cd044 8b040042 (b8210040)
    [  335.005810] ---[ end trace 0000000000000000 ]---
    [  335.010416] Kernel panic - not syncing: Oops: Fatal exception in interrupt
    [  335.017279] SMP: stopping secondary CPUs
    [  335.021374] Kernel Offset: 0x5baca8eb0000 from 0xffff800008000000
    [  335.027456] PHYS_OFFSET: 0x80000000
    [  335.030932] CPU features: 0x0000,0085c029,19805c82
    [  335.035713] Memory Limit: none
    [  335.038756] Rebooting in 180 seconds..

The fix is to allocate the memory in bond_open() which is guaranteed
to be called before any packets are processed.

Fixes: 848ca91 ("net: bonding: Use per-cpu rr_tx_counter")
CC: Jussi Maki <[email protected]>
Signed-off-by: Jonathan Toppins <[email protected]>
Acked-by: Jay Vosburgh <[email protected]>
Signed-off-by: Jakub Kicinski <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
jnettlet pushed a commit that referenced this issue Sep 29, 2022
commit 29a5b8a upstream.

When walking through an inode extents, the ext4_ext_binsearch_idx() function
assumes that the extent header has been previously validated.  However, there
are no checks that verify that the number of entries (eh->eh_entries) is
non-zero when depth is > 0.  And this will lead to problems because the
EXT_FIRST_INDEX() and EXT_LAST_INDEX() will return garbage and result in this:

[  135.245946] ------------[ cut here ]------------
[  135.247579] kernel BUG at fs/ext4/extents.c:2258!
[  135.249045] invalid opcode: 0000 [#1] PREEMPT SMP
[  135.250320] CPU: 2 PID: 238 Comm: tmp118 Not tainted 5.19.0-rc8+ #4
[  135.252067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
[  135.255065] RIP: 0010:ext4_ext_map_blocks+0xc20/0xcb0
[  135.256475] Code:
[  135.261433] RSP: 0018:ffffc900005939f8 EFLAGS: 00010246
[  135.262847] RAX: 0000000000000024 RBX: ffffc90000593b70 RCX: 0000000000000023
[  135.264765] RDX: ffff8880038e5f10 RSI: 0000000000000003 RDI: ffff8880046e922c
[  135.266670] RBP: ffff8880046e9348 R08: 0000000000000001 R09: ffff888002ca580c
[  135.268576] R10: 0000000000002602 R11: 0000000000000000 R12: 0000000000000024
[  135.270477] R13: 0000000000000000 R14: 0000000000000024 R15: 0000000000000000
[  135.272394] FS:  00007fdabdc56740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000
[  135.274510] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  135.276075] CR2: 00007ffc26bd4f00 CR3: 0000000006261004 CR4: 0000000000170ea0
[  135.277952] Call Trace:
[  135.278635]  <TASK>
[  135.279247]  ? preempt_count_add+0x6d/0xa0
[  135.280358]  ? percpu_counter_add_batch+0x55/0xb0
[  135.281612]  ? _raw_read_unlock+0x18/0x30
[  135.282704]  ext4_map_blocks+0x294/0x5a0
[  135.283745]  ? xa_load+0x6f/0xa0
[  135.284562]  ext4_mpage_readpages+0x3d6/0x770
[  135.285646]  read_pages+0x67/0x1d0
[  135.286492]  ? folio_add_lru+0x51/0x80
[  135.287441]  page_cache_ra_unbounded+0x124/0x170
[  135.288510]  filemap_get_pages+0x23d/0x5a0
[  135.289457]  ? path_openat+0xa72/0xdd0
[  135.290332]  filemap_read+0xbf/0x300
[  135.291158]  ? _raw_spin_lock_irqsave+0x17/0x40
[  135.292192]  new_sync_read+0x103/0x170
[  135.293014]  vfs_read+0x15d/0x180
[  135.293745]  ksys_read+0xa1/0xe0
[  135.294461]  do_syscall_64+0x3c/0x80
[  135.295284]  entry_SYSCALL_64_after_hwframe+0x46/0xb0

This patch simply adds an extra check in __ext4_ext_check(), verifying that
eh_entries is not 0 when eh_depth is > 0.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=215941
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216283
Cc: Baokun Li <[email protected]>
Cc: [email protected]
Signed-off-by: Luís Henriques <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Reviewed-by: Baokun Li <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Theodore Ts'o <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

0 participants