summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
dcffe94)
Observed issue
==============
On some systems (e.g. sles15sp4, RHEL 7), test_per_application_leaks
would fail spuriously.
Cause
=====
When walking the the FDs in `/proc/XX/fds`, the symbolic links exist but
link to unlinked files. E.g.
```
lrwx------ 1 root root 64 May 22 14:49 /proc/83578/fd/58 -> '/dev/shm/shm-ust-consumer-83578 (deleted)'
```
Solution
========
The test has been modified to loop, waiting for the post-destroy shm
count to drop back to the post-start count. In the case of a failure,
the test will hang forever but doesn't fail spuriously.
Known drawbacks
===============
None.
Change-Id: Id3c8a9f6db83fe888e79b8f06cb8308b4d90da87
Signed-off-by: Kienan Stewart <kstewart@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
import pathlib
import subprocess
import sys
import pathlib
import subprocess
import sys
test_utils_import_path = pathlib.Path(__file__).absolute().parents[3] / "utils"
sys.path.append(str(test_utils_import_path))
test_utils_import_path = pathlib.Path(__file__).absolute().parents[3] / "utils"
sys.path.append(str(test_utils_import_path))
def get_consumerd_pid(tap, parent, match_string):
def get_consumerd_pid(tap, parent, match_string):
try:
process = subprocess.Popen(
["pgrep", "-P", str(parent), "-f", match_string],
try:
process = subprocess.Popen(
["pgrep", "-P", str(parent), "-f", match_string],
)
process.wait()
output = str(process.stdout.read(), encoding="UTF-8").splitlines()
)
process.wait()
output = str(process.stdout.read(), encoding="UTF-8").splitlines()
raise Exception(
"Unexpected number of output lines (got {}): {}".format(
len(output), output
)
)
raise Exception(
"Unexpected number of output lines (got {}): {}".format(
len(output), output
)
)
+ elif len(output) == 1:
+ pid = int(output[0])
except Exception as e:
tap.diagnostic(
"Failed to find child process of '{}' matching '{}': '{}'".format(
except Exception as e:
tap.diagnostic(
"Failed to find child process of '{}' matching '{}': '{}'".format(
def count_process_dev_shm_fds(pid):
count = 0
def count_process_dev_shm_fds(pid):
count = 0
return count
dir = os.path.join("/proc", str(pid), "fd")
for root, dirs, files in os.walk(dir):
for f in files:
filename = pathlib.Path(os.path.join(root, f))
try:
return count
dir = os.path.join("/proc", str(pid), "fd")
for root, dirs, files in os.walk(dir):
for f in files:
filename = pathlib.Path(os.path.join(root, f))
try:
+ # The symlink in /proc/PID may exist, but point to an unlinked
+ # file - shm_unlink is called but either the kernel hasn't yet
+ # finished the clean-up or the consumer hasn't called close()
+ # on the FD yet.
if filename.is_symlink() and str(filename.resolve()).startswith(
"/dev/shm/shm-ust-consumer"
):
count += 1
except FileNotFoundError:
if filename.is_symlink() and str(filename.resolve()).startswith(
"/dev/shm/shm-ust-consumer"
):
count += 1
except FileNotFoundError:
- # As we're walking /proc/XX/fd/, fds may be added or removed
+ # As /proc/XX/fd/ is being walked, fds may be added or removed
session.stop()
session.destroy()
session.stop()
session.destroy()
- count_post_destroy = count_dev_shm_fds(tap, test_env)
+ # As there is not method to know exactly when the final close of the
+ # shm happens (it is timing dependant from an external point of view),
+ # this test iterates waiting for the post-destroy count to reach the
+ # post-start count. In a failure, this will loop infinitely.
+ tap.diagnostic(
+ "Waiting for post-destroy shm count to drop back to post-start level"
+ )
+ while True:
+ count_post_destroy = count_dev_shm_fds(tap, test_env)
+ if count_post_destroy == count_post_start:
+ break
+ time.sleep(0.1)
tap.diagnostic(
"FD counts post-start: {}, post-destroy: {}".format(
tap.diagnostic(
"FD counts post-start: {}, post-destroy: {}".format(