実行速度のボトルネックをplopで見つける

2016/10/6の追記

python3 setup.py install

でインストールするだけで良く、下にあるソースコードの変更は不必要になった。

以降は2016/9/5に書いた記事

Plop: Low-overhead profiling for Pythonにあるようにdropboxの中の人が開発したplopというツールがある。

これを使えば、プログラムの累積実行時間をメソッド単位で計測でき、コールグラフも作成出来る。pythonが遅いと感じた時に、簡単にボトルネックになっている箇所を発見出来るので便利！

しかしながら、https://github.com/bdarnell/plopにあるコードはpython2系でしか動かないので、3系でも動くように修正した。

面倒だったので、計測用のコードしか修正してないです。可視化は2系のまま。

使い方

計測

python3 -m plop.collector myscript.py

可視化

python -m plop.viewer --datadir=/tmp

コード

collector.py

from __future__ import with_statement

import collections

import signal

import sys

import _thread as thread

from plop import platform

import time



class Collector(object):

    MODES = {

        'prof': (platform.ITIMER_PROF, signal.SIGPROF),

        'virtual': (platform.ITIMER_VIRTUAL, signal.SIGVTALRM),

        'real': (platform.ITIMER_REAL, signal.SIGALRM),

        }



    def __init__(self, interval=0.01, mode='virtual'):

        self.interval = interval

        self.mode = mode

        assert mode in Collector.MODES

        timer, sig = Collector.MODES[self.mode]

        signal.signal(sig, self.handler)

        self.reset()



    def reset(self):

        # defaultdict instead of counter for pre-2.7 compatibility

        self.stack_counts = collections.defaultdict(int)

        self.samples_remaining = 0

        self.stopping = False

        self.stopped = False



        self.samples_taken = 0

        self.sample_time = 0



    def start(self, duration=30.0):

        self.stopping = False

        self.stopped = False

        self.samples_remaining = int(duration / self.interval)

        timer, sig = Collector.MODES[self.mode]

        platform.setitimer(timer, self.interval, self.interval)



    def stop(self):

        self.stopping = True

        self.wait()



    def wait(self):

        while not self.stopped:

            pass # need busy wait; ITIMER_PROF doesn't proceed while sleeping



    def handler(self, sig, current_frame):

        start = time.time()

        self.samples_remaining -= 1

        if self.samples_remaining <= 0 or self.stopping:

            platform.setitimer(Collector.MODES[self.mode][0], 0, 0)

            self.stopped = True

            return

        current_tid = thread.get_ident()

        for tid, frame in sys._current_frames().items():

            if tid == current_tid:

                frame = current_frame

            frames = []

            while frame is not None:

                code = frame.f_code

                frames.append((code.co_filename, code.co_firstlineno, code.co_name))

                frame = frame.f_back

            self.stack_counts[tuple(frames)] += 1

        end = time.time()

        self.samples_taken += 1

        self.sample_time += (end - start)



    def filter(self, max_stacks):

        self.stack_counts = dict(sorted(self.stack_counts.items(), key=lambda kv: -kv[1])[:max_stacks])





def main():

    # TODO: more options, refactor this into somewhere shared

    # between tornado.autoreload and auto2to3

    if len(sys.argv) >= 3 and sys.argv[1] == '-m':

        mode = 'module'

        module = sys.argv[2]

        del sys.argv[1:3]

    elif len(sys.argv) >= 2:

        mode = "script"

        script = sys.argv[1]

        sys.argv = sys.argv[1:]

    else:

        print("usage: python -m plop.collector -m module_to_run")

        sys.exit(1)

    



    collector = Collector()

    collector.start(duration=3600)

    exit_code = 0

    try:

        if mode == "module":

            import runpy

            runpy.run_module(module, run_name="__main__", alter_sys=True)

        elif mode == "script":

            with open(script) as f:

                global __file__

                __file__ = script

                # Use globals as our "locals" dictionary so that

                # something that tries to import __main__ (e.g. the unittest

                # module) will see the right things.

                exec(f.read(), globals())

    except (SystemExit) as e:

        exit_code = e.code

    collector.stop()

    collector.filter(50)

    with open('/tmp/plop.out', 'w') as f:

        f.write(repr(dict(collector.stack_counts)))

    print("profile output saved to /tmp/plop.out")

    overhead = float(collector.sample_time) / collector.samples_taken

    print("overhead was %s per sample (%s%%)" % (

        overhead, overhead / collector.interval))

    sys.exit(exit_code)





if __name__ == '__main__':

    main()

変更点だけなら

--- plop/collector.py   2015-09-01 16:34:42.000000000 +0900

+++ /usr/local/lib/python3.4/dist-packages/plop/collector.py    2015-09-01 16:36:38.515175381 +0900

@@ -1,20 +1,17 @@

 from __future__ import with_statement

 import collections

-import os

 import signal

 import sys

-import thread

+import _thread as thread

+from plop import platform

 import time

-import argparse

-import plop.platform

-

 

 class Collector(object):

     MODES = {

-        'prof': (plop.platform.ITIMER_PROF, signal.SIGPROF),

-        'virtual': (plop.platform.ITIMER_VIRTUAL, signal.SIGVTALRM),

-        'real': (plop.platform.ITIMER_REAL, signal.SIGALRM),

-    }

+        'prof': (platform.ITIMER_PROF, signal.SIGPROF),

+        'virtual': (platform.ITIMER_VIRTUAL, signal.SIGVTALRM),

+        'real': (platform.ITIMER_REAL, signal.SIGALRM),

+        }

 

     def __init__(self, interval=0.01, mode='virtual'):

         self.interval = interval

@@ -22,11 +19,11 @@

         assert mode in Collector.MODES

         timer, sig = Collector.MODES[self.mode]

         signal.signal(sig, self.handler)

-        signal.siginterrupt(sig, False)

         self.reset()

 

     def reset(self):

-        self.stacks = list()

+        # defaultdict instead of counter for pre-2.7 compatibility

+        self.stack_counts = collections.defaultdict(int)

         self.samples_remaining = 0

         self.stopping = False

         self.stopped = False

@@ -39,7 +36,7 @@

         self.stopped = False

         self.samples_remaining = int(duration / self.interval)

         timer, sig = Collector.MODES[self.mode]

-        plop.platform.setitimer(timer, self.interval, self.interval)

+        platform.setitimer(timer, self.interval, self.interval)

 

     def stop(self):

         self.stopping = True

@@ -47,13 +44,13 @@

 

     def wait(self):

         while not self.stopped:

-            pass  # need busy wait; ITIMER_PROF doesn't proceed while sleeping

+            pass # need busy wait; ITIMER_PROF doesn't proceed while sleeping

 

     def handler(self, sig, current_frame):

         start = time.time()

         self.samples_remaining -= 1

         if self.samples_remaining <= 0 or self.stopping:

-            plop.platform.setitimer(Collector.MODES[self.mode][0], 0, 0)

+            platform.setitimer(Collector.MODES[self.mode][0], 0, 0)

             self.stopped = True

             return

         current_tid = thread.get_ident()

@@ -65,135 +62,56 @@

                 code = frame.f_code

                 frames.append((code.co_filename, code.co_firstlineno, code.co_name))

                 frame = frame.f_back

-            self.stacks.append(frames)

+            self.stack_counts[tuple(frames)] += 1

         end = time.time()

         self.samples_taken += 1

         self.sample_time += (end - start)

 

-

-class CollectorFormatter(object):

-    """

-    Abstract class for output formats

-    """

-    def format(self, collector):

-        raise Exception("not implemented")

-

-    def store(self, collector, filename):

-        with open(filename, "wb") as f:

-            f.write(self.format(collector))

-

-

-class PlopFormatter(CollectorFormatter):

-    """

-    Formats stack frames for plop.viewer

-    """

-    def __init__(self, max_stacks=50):

-        self.max_stacks = 50

-

-    def format(self, collector):

-        # defaultdict instead of counter for pre-2.7 compatibility

-        stack_counts = collections.defaultdict(int)

-        for frames in collector.stacks:

-            stack_counts[tuple(frames)] += 1

-        stack_counts = dict(sorted(stack_counts.iteritems(),

-                                   key=lambda kv: -kv[1])[:self.max_stacks])

-        return repr(stack_counts)

-

-

-class FlamegraphFormatter(CollectorFormatter):

-    """

-    Creates Flamegraph files

-    """

-    def format(self, collector):

-        output = ""

-        previous = None

-        previous_count = 1

-        for stack in collector.stacks:

-            current = self.format_flame(stack)

-            if current == previous:

-                previous_count += 1

-            else:

-                output += "%s %d\n" % (previous, previous_count)

-                previous_count = 1

-                previous = current

-        output += "%s %d\n" % (previous, previous_count)

-        return output

-

-    def format_flame(self, stack):

-        funcs = map("{0[2]} ({0[0]}:{0[1]})".format, reversed(stack))

-        return ";".join(funcs)

+    def filter(self, max_stacks):

+        self.stack_counts = dict(sorted(self.stack_counts.items(), key=lambda kv: -kv[1])[:max_stacks])

 

 

 def main():

     # TODO: more options, refactor this into somewhere shared

     # between tornado.autoreload and auto2to3

-    parser = argparse.ArgumentParser(description="Plop: Python Low-Overhead Profiler",

-                                     prog="python -m plop.collector",

-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)

-    parser.add_argument("--format", "-f", help="Output format",

-                        choices=["plop", "flamegraph"], default="plop")

-    parser.add_argument("--module", "-m", help="Execute target as a module",

-                        action="store_const", const=True, default=False)

-    parser.add_argument("--mode", help="Interval timer mode to use, see `man 2 setitimer`",

-                        choices=["prof", "real", "virtual"], default="prof")

-    parser.add_argument("--interval", help="Timer interval in seconds", default=0.01, type=float)

-    parser.add_argument("--duration", help="Profiling duration in seconds", default=3600,

-                        type=int)

-    parser.add_argument("--max-stacks", help=("Number of most frequent stacks to store."

-                        " Ignored for Flamegraph output."), type=int, default=50)

-    parser.add_argument("target", help="Module or script to run")

-    parser.add_argument("arguments", nargs=argparse.REMAINDER,

-                        help="Pass-through arguments for the profiled application")

-    args = parser.parse_args()

-    sys.argv = [args.target] + args.arguments

-

-    if args.format == "flamegraph":

-        extension = "flame"

-        formatter = FlamegraphFormatter()

-    elif args.format == "plop":

-        extension = "plop"

-        formatter = PlopFormatter(max_stacks=args.max_stacks)

+    if len(sys.argv) >= 3 and sys.argv[1] == '-m':

+        mode = 'module'

+        module = sys.argv[2]

+        del sys.argv[1:3]

+    elif len(sys.argv) >= 2:

+        mode = "script"

+        script = sys.argv[1]

+        sys.argv = sys.argv[1:]

     else:

-        sys.stderr.write("Unhandled output format: %s" % args.format)

-        sys.stderr.flush()

+        print("usage: python -m plop.collector -m module_to_run")

         sys.exit(1)

+    

 

-    if not os.path.exists('profiles'):

-        os.mkdir('profiles')

-    filename = 'profiles/%s-%s.%s' % (args.target, time.strftime('%Y%m%d-%H%M-%S'),

-                                      extension)

-

-    collector = Collector(mode=args.mode, interval=args.interval)

-    collector.start(duration=args.duration)

+    collector = Collector()

+    collector.start(duration=3600)

     exit_code = 0

     try:

-        if args.module:

+        if mode == "module":

             import runpy

-            runpy.run_module(args.target, run_name="__main__", alter_sys=True)

-        else:

-            with open(args.target) as f:

-                # Execute the script in our namespace instead of creating

-                # a new one so that something that tries to import __main__

-                # (e.g. the unittest module) will see names defined in the

-                # script instead of just those defined in this module.

+            runpy.run_module(module, run_name="__main__", alter_sys=True)

+        elif mode == "script":

+            with open(script) as f:

                 global __file__

-                __file__ = args.target

-                # If __package__ is defined, imports may be incorrectly

-                # interpreted as relative to this module.

-                global __package__

-                del __package__

-                exec f.read() in globals(), globals()

-    except SystemExit, e:

+                __file__ = script

+                # Use globals as our "locals" dictionary so that

+                # something that tries to import __main__ (e.g. the unittest

+                # module) will see the right things.

+                exec(f.read(), globals())

+    except (SystemExit) as e:

         exit_code = e.code

     collector.stop()

-    if collector.samples_taken:

-        formatter.store(collector, filename)

-        print "profile output saved to %s" % filename

-        overhead = float(collector.sample_time) / collector.samples_taken

-        print "overhead was %s per sample (%s%%)" % (

-            overhead, overhead / collector.interval)

-    else:

-        print "no samples collected; program was too fast"

+    collector.filter(50)

+    with open('/tmp/plop.out', 'w') as f:

+        f.write(repr(dict(collector.stack_counts)))

+    print("profile output saved to /tmp/plop.out")

+    overhead = float(collector.sample_time) / collector.samples_taken

+    print("overhead was %s per sample (%s%%)" % (

+        overhead, overhead / collector.interval))

     sys.exit(exit_code)

September 5, 2015

実行速度のボトルネックをplopで見つける

2016/10/6の追記

以降は2016/9/5に書いた記事

使い方

計測

可視化

コード

collector.py

変更点だけなら