Rig up a query-frequency analyzer with the most commonly seen callstack, including Jinja demangling.

This commit is contained in:
Ben Rog-Wilhelm 2022-11-10 19:17:30 -06:00 committed by Ben Rog-Wilhelm
parent 6b55cc1f5b
commit 1437bc3092
6 changed files with 69 additions and 7 deletions

View file

@ -26,7 +26,10 @@ CMD [ "/usr/bin/supervisord", "-c", "/etc/supervisord.conf" ]
################################################################### ###################################################################
# Dev container # Dev container
FROM release AS dev FROM release AS dev
# we don't actually do anything different yet
# Install our tweaked sqlalchemy-easy-profile
COPY thirdparty/sqlalchemy-easy-profile sqlalchemy-easy-profile
RUN cd sqlalchemy-easy-profile && python3 setup.py install
################################################################### ###################################################################

View file

@ -48,6 +48,42 @@ if environ.get("FLASK_PROFILER_ENDPOINT"):
profiler = flask_profiler.Profiler() profiler = flask_profiler.Profiler()
profiler.init_app(app) profiler.init_app(app)
try:
from easy_profile import EasyProfileMiddleware
from jinja2.utils import internal_code
import inspect as inspectlib
import linecache
def jinja_unmangle_stacktrace():
rewritten_frames = []
for record in inspectlib.stack():
# Skip jinja internalcode frames
if record.frame.f_code in internal_code:
continue
filename = record.frame.f_code.co_filename
lineno = record.frame.f_lineno
name = record.frame.f_code.co_name
template = record.frame.f_globals.get("__jinja_template__")
if template is not None:
lineno = template.get_corresponding_lineno(lineno)
line = linecache.getline(filename, lineno).strip()
rewritten_frames.append(f' File "{filename}", line {lineno}, {name}\n {line}\n')
return "".join(rewritten_frames)
app.wsgi_app = EasyProfileMiddleware(
app.wsgi_app,
stack_callback = jinja_unmangle_stacktrace)
except ModuleNotFoundError:
# failed to import, just keep on going
pass
app.config["SITE_ID"]=environ.get("SITE_ID").strip() app.config["SITE_ID"]=environ.get("SITE_ID").strip()
app.config["SITE_TITLE"]=environ.get("SITE_TITLE").strip() app.config["SITE_TITLE"]=environ.get("SITE_TITLE").strip()
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

View file

@ -44,3 +44,8 @@ pytest = "*"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
norecursedirs = [
"thirdparty/*",
]

View file

@ -1,4 +1,5 @@
import re import re
import traceback
from .profiler import SessionProfiler from .profiler import SessionProfiler
from .reporters import Reporter, StreamReporter from .reporters import Reporter, StreamReporter
@ -23,7 +24,8 @@ class EasyProfileMiddleware(object):
reporter=None, reporter=None,
exclude_path=None, exclude_path=None,
min_time=0, min_time=0,
min_query_count=1): min_query_count=1,
stack_callback=None):
if reporter: if reporter:
if not isinstance(reporter, Reporter): if not isinstance(reporter, Reporter):
@ -37,9 +39,10 @@ class EasyProfileMiddleware(object):
self.exclude_path = exclude_path or [] self.exclude_path = exclude_path or []
self.min_time = min_time self.min_time = min_time
self.min_query_count = min_query_count self.min_query_count = min_query_count
self.stack_callback = stack_callback
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
profiler = SessionProfiler(self.engine) profiler = SessionProfiler(self.engine, stack_callback=self.stack_callback)
path = environ.get("PATH_INFO", "") path = environ.get("PATH_INFO", "")
if not self._ignore_request(path): if not self._ignore_request(path):
method = environ.get("REQUEST_METHOD") method = environ.get("REQUEST_METHOD")
@ -61,3 +64,4 @@ class EasyProfileMiddleware(object):
if (stats["total"] >= self.min_query_count and if (stats["total"] >= self.min_query_count and
stats["duration"] >= self.min_time): stats["duration"] >= self.min_time):
self.reporter.report(path, stats) self.reporter.report(path, stats)

View file

@ -5,6 +5,8 @@ from queue import Queue
import re import re
import sys import sys
import time import time
import traceback
import pprint
from sqlalchemy import event from sqlalchemy import event
from sqlalchemy.engine.base import Engine from sqlalchemy.engine.base import Engine
@ -32,7 +34,7 @@ def _get_object_name(obj):
_DebugQuery = namedtuple( _DebugQuery = namedtuple(
"_DebugQuery", "statement,parameters,start_time,end_time" "_DebugQuery", "statement,parameters,start_time,end_time,callstack"
) )
@ -57,7 +59,7 @@ class SessionProfiler:
_before = "before_cursor_execute" _before = "before_cursor_execute"
_after = "after_cursor_execute" _after = "after_cursor_execute"
def __init__(self, engine=None): def __init__(self, engine=None, stack_callback=None):
if engine is None: if engine is None:
self.engine = Engine self.engine = Engine
self.db_name = "default" self.db_name = "default"
@ -70,6 +72,8 @@ class SessionProfiler:
self._stats = None self._stats = None
self._stack_callback = stack_callback or self._stack_callback_default
def __enter__(self): def __enter__(self):
self.begin() self.begin()
@ -160,6 +164,9 @@ class SessionProfiler:
self._stats["duration"] += query.duration self._stats["duration"] += query.duration
duplicates = self._stats["duplicates"].get(query.statement, -1) duplicates = self._stats["duplicates"].get(query.statement, -1)
self._stats["duplicates"][query.statement] = duplicates + 1 self._stats["duplicates"][query.statement] = duplicates + 1
if query.statement not in self._stats["callstacks"]:
self._stats["callstacks"][query.statement] = Counter()
self._stats["callstacks"][query.statement].update({query.callstack: 1})
return self._stats return self._stats
@ -174,6 +181,7 @@ class SessionProfiler:
self._stats["duration"] = 0 self._stats["duration"] = 0
self._stats["call_stack"] = [] self._stats["call_stack"] = []
self._stats["duplicates"] = Counter() self._stats["duplicates"] = Counter()
self._stats["callstacks"] = {}
def _before_cursor_execute(self, conn, cursor, statement, parameters, def _before_cursor_execute(self, conn, cursor, statement, parameters,
context, executemany): context, executemany):
@ -182,5 +190,8 @@ class SessionProfiler:
def _after_cursor_execute(self, conn, cursor, statement, parameters, def _after_cursor_execute(self, conn, cursor, statement, parameters,
context, executemany): context, executemany):
self.queries.put(DebugQuery( self.queries.put(DebugQuery(
statement, parameters, context._query_start_time, _timer() statement, parameters, context._query_start_time, _timer(), self._stack_callback()
)) ))
def _stack_callback_default(self):
return ''.join(traceback.format_stack())

View file

@ -86,6 +86,8 @@ class StreamReporter(Reporter):
summary = "Total queries: {0} in {1:.3}s".format(total, duration) summary = "Total queries: {0} in {1:.3}s".format(total, duration)
output += self._info_line("\n{0}\n".format(summary), total) output += self._info_line("\n{0}\n".format(summary), total)
callstacks = stats["callstacks"]
# Display duplicated sql statements. # Display duplicated sql statements.
# #
# Get top counters were value greater than 1 and write to # Get top counters were value greater than 1 and write to
@ -95,11 +97,12 @@ class StreamReporter(Reporter):
for statement, count in most_common: for statement, count in most_common:
if count < 1: if count < 1:
continue continue
callstack = callstacks[statement].most_common(1)
# Wrap SQL statement and returning a list of wrapped lines # Wrap SQL statement and returning a list of wrapped lines
statement = sqlparse.format( statement = sqlparse.format(
statement, reindent=True, keyword_case="upper" statement, reindent=True, keyword_case="upper"
) )
text = "\nRepeated {0} times:\n{1}\n".format(count + 1, statement) text = "\nRepeated {0} times:\n{1}\nMost common callstack {2} times:\n{3}\n".format(count + 1, statement, callstack[0][1], callstack[0][0])
output += self._info_line(text, count) output += self._info_line(text, count)
self._file.write(output) self._file.write(output)