Use sessionmaker instead of context block

Essentially there were two bugs: 1) The logging lines before explicit `Session.begin` caused the root "A transaction is already begun on this Session.", which is resolved by moving the logging within the explicit session block, as intended. 2) We were reusing a Session across cron runs. The architecture was intended to not do so, but we didn't have an actual sessionmaker available to cron. We probably ought to get rid of scoped_session elsewhere, but baby steps to resolve the immediate issue. In testing, this works correctly with the fix to `db.begin`, and tests with deliberately reintroducing the logging bug, the changes to session creation prevent the every-15sec stack trace spam.
2023-05-01 04:30:21 -04:00 · 2023-05-01 04:30:21 -04:00 · 26acca47b9
commit 26acca47b9
parent 46a392ba31
2 changed files with 51 additions and 51 deletions
--- a/files/main.py
+++ b/files/main.py
@ -194,11 +194,12 @@ limiter = flask_limiter.Limiter(
 # ...and then after that we can load the database.
 engine: Engine = create_engine(DATABASE_URL)
-db_session: scoped_session = scoped_session(sessionmaker(
+db_session_factory: sessionmaker = sessionmaker(
 	bind=engine,
 	autoflush=False,
 	future=True,
-))
+)
 db_session: scoped_session = scoped_session(db_session_factory)
 # now that we've that, let's add the cache, compression, and mail extensions to our app...
--- a/files/commands/cron.py
+++ b/files/commands/cron.py
@ -4,9 +4,9 @@ import time
 from datetime import datetime, timezone
 from typing import Final
-from sqlalchemy.orm import scoped_session, Session
+from sqlalchemy.orm import sessionmaker, Session
-from files.__main__ import app, db_session
+from files.__main__ import app, db_session_factory
 from files.classes.cron.tasks import (DayOfWeek, RepeatableTask,
                                      RepeatableTaskRun, ScheduledTaskState)
@ -41,7 +41,7 @@ def cron_app_worker():
 	logging.info("Starting scheduler worker process")
 	while True:
 		try:
-			_run_tasks(db_session)
+			_run_tasks(db_session_factory)
 		except Exception as e:
 			logging.exception(
 				"An unhandled exception occurred while running tasks",
@ -77,7 +77,7 @@ def _acquire_lock_exclusive(db: Session, table: str):
 		raise
-def _run_tasks(db_session_factory: scoped_session):
+def _run_tasks(db_session_factory: sessionmaker):
 	'''
 	Runs tasks, attempting to guarantee that a task is ran once and only once.
 	This uses postgres to lock the table containing our tasks at key points in
@ -87,54 +87,53 @@ def _run_tasks(db_session_factory: scoped_session):
 	running task does not lock the entire table for its entire run, which would
 	for example, prevent any statistics about status from being gathered.
 	'''
 	db: Session = db_session_factory()
-	db: Session
+	with _acquire_lock_exclusive(db, RepeatableTask.__tablename__):
-	with db_session_factory() as db:
+		now: datetime = datetime.now(tz=timezone.utc)
 		tasks: list[RepeatableTask] = db.query(RepeatableTask).filter(
 			RepeatableTask.enabled == True,
 			RepeatableTask.frequency_day != int(DayOfWeek.NONE),
 			RepeatableTask.run_state != int(ScheduledTaskState.RUNNING),
 			(RepeatableTask.run_time_last <= now)
 				| (RepeatableTask.run_time_last == None),
 		).all()
 		# SQLA needs to query again for the inherited object info anyway
 		# so it's fine that objects in the list get expired on txn end.
 		# Prefer more queries to risk of task run duplication.
 		tasks_to_run: list[RepeatableTask] = list(filter(
 			lambda task: task.can_run(now),	tasks))
 	for task in tasks_to_run:
 		now = datetime.now(tz=timezone.utc)
 		with _acquire_lock_exclusive(db, RepeatableTask.__tablename__):
-			now: datetime = datetime.now(tz=timezone.utc)
+			# We need to check for runnability again because we don't mutex
 			# the RepeatableTask.run_state until now.
 			if not task.can_run(now):
 				continue
 			task.run_time_last = now
 			task.run_state_enum = ScheduledTaskState.RUNNING
-			tasks: list[RepeatableTask] = db.query(RepeatableTask).filter(
+		# This *must* happen before we start doing db queries, including sqlalchemy db queries
-				RepeatableTask.enabled == True,
+		db.begin()
-				RepeatableTask.frequency_day != int(DayOfWeek.NONE),
+		task_debug_identifier = f"(ID {task.id}:{task.label})"
-				RepeatableTask.run_state != int(ScheduledTaskState.RUNNING),
+		logging.info(f"Running task {task_debug_identifier}")
 				(RepeatableTask.run_time_last <= now)
 					| (RepeatableTask.run_time_last == None),
 			).all()
-			# SQLA needs to query again for the inherited object info anyway
+		run: RepeatableTaskRun = task.run(db, task.run_time_last_or_created_utc)
 			# so it's fine that objects in the list get expired on txn end.
 			# Prefer more queries to risk of task run duplication.
 			tasks_to_run: list[RepeatableTask] = list(filter(
 				lambda task: task.can_run(now),	tasks))
-		for task in tasks_to_run:
+		if run.exception:
-			now = datetime.now(tz=timezone.utc)
+			# TODO: collect errors somewhere other than just here and in the 
-			with _acquire_lock_exclusive(db, RepeatableTask.__tablename__):
+			# task run object itself (see #220).
-				# We need to check for runnability again because we don't mutex
+			logging.exception(
-				# the RepeatableTask.run_state until now.
+				f"Exception running task {task_debug_identifier}", 
-				if not task.can_run(now):
+				exc_info=run.exception
-					continue
+			)
-				task.run_time_last = now
+			db.rollback()
-				task.run_state_enum = ScheduledTaskState.RUNNING
+		else:
 			db.commit()
 			logging.info(f"Finished task {task_debug_identifier}")
-			# This *must* happen before we start doing db queries, including sqlalchemy db queries
+		with _acquire_lock_exclusive(db, RepeatableTask.__tablename__):
-			db.begin()
+			task.run_state_enum = ScheduledTaskState.WAITING
 			task_debug_identifier = f"(ID {task.id}:{task.label})"
 			logging.info(f"Running task {task_debug_identifier}")
 			run: RepeatableTaskRun = task.run(db, task.run_time_last_or_created_utc)
 			if run.exception:
 				# TODO: collect errors somewhere other than just here and in the 
 				# task run object itself (see #220).
 				logging.exception(
 					f"Exception running task {task_debug_identifier}", 
 					exc_info=run.exception
 				)
 				db.rollback()
 			else:
 				db.commit()
 				logging.info(f"Finished task {task_debug_identifier}")
 			with _acquire_lock_exclusive(db, RepeatableTask.__tablename__):
 				task.run_state_enum = ScheduledTaskState.WAITING