| #!/usr/bin/env python3 |
| |
| import os |
| import re |
| import shutil |
| import signal |
| import subprocess |
| import sys |
| import tarfile |
| import time |
| |
| MIN_FREE_SPACE = 10 << 30 |
| MAX_PERF_SIZE = 200 << 20 |
| MAX_PERF_FREQ = 100 |
| WAKEUP_INTERVAL = 60 |
| |
| perf_freq = MAX_PERF_FREQ |
| logpath = None |
| realpath = os.path.dirname(os.path.realpath(__file__)) |
| perfbin = os.path.join(realpath, 'perf') |
| flamegraph = os.path.join(realpath, 'flamegraph.pl') |
| stackcollapse = os.path.join(realpath, 'stackcollapse-perf.pl') |
| |
| |
| def check_permissions(): |
| params = { |
| '/proc/sys/kernel/kptr_restrict': '0', |
| '/proc/sys/kernel/perf_event_paranoid': '-1', |
| } |
| |
| err = False |
| for k, v in params.items(): |
| with open(k) as f: |
| if f.readline().strip() != v: |
| print('Insufficient permission:', k, '!=', v) |
| err = True |
| return err |
| |
| |
| def create_logpath(): |
| global logpath |
| |
| disk = shutil.disk_usage(realpath) |
| if disk.free < MIN_FREE_SPACE: |
| print('Insufficient disk space: <', MIN_FREE_SPACE) |
| return True |
| |
| logpath = os.path.join(realpath, time.strftime('log_%m%d_%H%M%S')) |
| print('Creating directory', logpath) |
| os.mkdir(logpath) |
| return False |
| |
| |
| def save_file(fname_in, suffix=None, allow_failures=False): |
| fname_out = os.path.basename(fname_in) |
| if suffix is not None: |
| fname_out += '-' + suffix |
| fname_out = os.path.join(logpath, fname_out) |
| try: |
| shutil.copyfile(fname_in, fname_out) |
| return False |
| except IOError: |
| if not allow_failures: |
| raise |
| return True |
| |
| |
| def run_cmd(cmd, suffix=None, allow_failures=False, ignore_errors=False): |
| if ignore_errors: |
| stderr = subprocess.DEVNULL |
| else: |
| stderr = None |
| |
| fname = os.path.basename(cmd.split()[0]) |
| if suffix is not None: |
| fname += '-' + str(suffix) |
| fname = os.path.join(logpath, fname) |
| |
| try: |
| output = subprocess.check_output(cmd, shell=True, stderr=stderr, |
| encoding='utf-8', executable='/bin/bash') |
| if output: |
| with open(fname, 'w') as f: |
| f.write(output) |
| return False |
| except subprocess.CalledProcessError: |
| if not allow_failures: |
| raise |
| return True |
| |
| |
| def collect_sysinfo(): |
| files = [ |
| '/boot/config-' + os.uname().release, |
| '/proc/cmdline', |
| '/proc/cpuinfo', |
| '/proc/meminfo', |
| '/proc/swaps', |
| '/proc/sys/vm/swappiness', |
| '/proc/zoneinfo', |
| ] |
| |
| for fname in files: |
| save_file(fname) |
| |
| save_file('/sys/kernel/mm/transparent_hugepage/enabled', 'thp') |
| |
| run_cmd('fdisk -l', ignore_errors=True) |
| run_cmd('lshw', ignore_errors=True) |
| run_cmd('sysctl -a', ignore_errors=True) |
| |
| |
| def stop_perf(perf): |
| os.kill(perf.pid, signal.SIGINT) |
| perf.wait() |
| |
| |
| def start_perf(freq): |
| if not freq: |
| return None |
| |
| print('Running perf record at frequency', freq) |
| fname = os.path.join(logpath, 'perf-%d.data' % freq) |
| cmd = '%s record -g --all-kernel --kernel-callchains -F %d -o %s' % ( |
| perfbin, freq, fname) |
| return subprocess.Popen(cmd, shell=True, executable='/bin/bash') |
| |
| |
| def restart_perf(perf): |
| global perf_freq |
| |
| if perf: |
| fname = os.path.join(logpath, 'perf-%d.data' % perf_freq) |
| if os.stat(fname).st_size < MAX_PERF_SIZE: |
| return perf |
| |
| stop_perf(perf) |
| perf_freq >>= 1 |
| |
| return start_perf(perf_freq) |
| |
| |
| def gen_reports(): |
| i = 0 |
| freq = MAX_PERF_FREQ |
| while freq >= perf_freq: |
| s = '%04d' % i |
| fname = os.path.join(logpath, 'perf-%d.data' % freq) |
| allow_failures = freq != MAX_PERF_FREQ |
| |
| print('Processing', fname) |
| cmd = '%s report -f --call-graph=none -i %s' % (perfbin, fname) |
| if run_cmd(cmd, s + '.txt', allow_failures): |
| break |
| |
| cmd = '%s script -f -i %s' % (perfbin, fname) |
| cmd += ' | %s | %s' % (stackcollapse, flamegraph) |
| if run_cmd(cmd, s + '.svg', allow_failures): |
| break |
| |
| i += 1 |
| freq >>= 1 |
| if not freq: |
| break |
| |
| |
| def create_tarball(): |
| fname = logpath + '.tar.xz' |
| print('Creating', fname) |
| |
| def exclude(tarinfo): |
| fname = os.path.basename(tarinfo.name) |
| if re.match(r'perf-\d+.data', fname): |
| return None |
| return tarinfo |
| |
| with tarfile.open(fname, 'x:xz') as tar: |
| tar.add(logpath, os.path.basename(logpath), filter=exclude) |
| shutil.rmtree(logpath) |
| print('Done') |
| |
| |
| def main_loop(): |
| perf = None |
| try: |
| i = 0 |
| while True: |
| s = '%04d' % i |
| save_file('/proc/vmstat', s) |
| save_file('/sys/kernel/debug/lru_gen_full', s, True) |
| run_cmd('top -b -n 1', s) |
| i += 1 |
| perf = restart_perf(perf) |
| time.sleep(WAKEUP_INTERVAL) |
| except KeyboardInterrupt: |
| if perf: |
| try: |
| print('Stopping perf') |
| stop_perf(perf) |
| except Exception: # pylint: disable=broad-except |
| print('Unexpected error:', sys.exc_info()[0]) |
| gen_reports() |
| create_tarball() |
| |
| |
| def main(): |
| print('Press Control-C when you are done') |
| if check_permissions(): |
| return |
| if create_logpath(): |
| return |
| collect_sysinfo() |
| main_loop() |
| |
| if __name__ == '__main__': |
| main() |