Проблемы с соединением с SQLAlchemy и несколькими процессами
Я использую PostgreSQL и SQLAlchemy в проекте, который состоит из основного процесса, который запускает дочерние процессы. Все эти процессы обращаются к базе данных через SQLAlchemy.
Я испытываю повторяющиеся сбои соединения: первые несколько дочерних процессов работают правильно, но через некоторое время возникает ошибка соединения. Вот MWCE:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, create_engine
from sqlalchemy.orm import sessionmaker
DB_URL = 'postgresql://user:password@localhost/database'
Base = declarative_base()
class Dummy(Base):
__tablename__ = 'dummies'
id = Column(Integer, primary_key=True)
value = Column(Integer)
engine = None
Session = None
session = None
def init():
global engine, Session, session
engine = create_engine(DB_URL)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
def cleanup():
session.close()
engine.dispose()
def target(id):
init()
try:
dummy = session.query(Dummy).get(id)
dummy.value += 1
session.add(dummy)
session.commit()
finally:
cleanup()
def main():
init()
try:
dummy = Dummy(value=1)
session.add(dummy)
session.commit()
p = multiprocessing.Process(target=target, args=(dummy.id,))
p.start()
p.join()
session.refresh(dummy)
assert dummy.value == 2
finally:
cleanup()
if __name__ == '__main__':
i = 1
while True:
print(i)
main()
i += 1
В моей системе (PostgreSQL 9.6, SQLAlchemy 1.1.4, psycopg2 2.6.2, Python 2.7, Ubuntu 14.04) это дает
1
2
3
4
5
6
7
8
9
10
11
Traceback (most recent call last):
File "./fork_test.py", line 64, in <module>
main()
File "./fork_test.py", line 55, in main
session.refresh(dummy)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 1422, in refresh
only_load_props=attribute_names) is None:
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/loading.py", line 223, in load_on_ident
return q.one()
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2756, in one
ret = self.one_or_none()
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2726, in one_or_none
ret = list(self)
File "/home,/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2797, in __iter__
return self._execute_and_instances(context)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2820, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 945, in execute
return meth(self, multiparams, params)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/sql/elements.py", line 263, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1053, in _execute_clauseelement
compiled_sql, distilled_params
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1189, in _execute_context
context)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1393, in _handle_dbapi_exception
exc_info
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/util/compat.py", line 202, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1182, in _execute_context
context)
File "/home/vagrant/latest-sqlalchemy/local/lib/python2.7/site-packages/sqlalchemy/engine/default.py", line 469, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
[SQL: 'SELECT dummies.id AS dummies_id, dummies.value AS dummies_value \nFROM dummies \nWHERE dummies.id = %(param_1)s'] [parameters: {'param_1': 11074}]
Это повторяется и всегда вылетает на одной и той же итерации.
Я создаю новый движок и сессию после разветвления в соответствии с рекомендациямиДокументация по SQLAlchemy а такжев другом месте, Интересно, что следующий немного другой подход не дает сбоя:
import contextlib
import multiprocessing
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, create_engine
from sqlalchemy.orm import sessionmaker
DB_URL = 'postgresql://user:password@localhost/database'
Base = declarative_base()
class Dummy(Base):
__tablename__ = 'dummies'
id = Column(Integer, primary_key=True)
value = Column(Integer)
@contextlib.contextmanager
def get_session():
engine = sqlalchemy.create_engine(DB_URL)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
try:
yield session
finally:
session.close()
engine.dispose()
def target(id):
with get_session() as session:
dummy = session.query(Dummy).get(id)
dummy.value += 1
session.add(dummy)
session.commit()
def main():
with get_session() as session:
dummy = Dummy(value=1)
session.add(dummy)
session.commit()
p = multiprocessing.Process(target=target, args=(dummy.id,))
p.start()
p.join()
session.refresh(dummy)
assert dummy.value == 2
if __name__ == '__main__':
i = 1
while True:
print(i)
main()
i += 1
Поскольку исходный код более сложный и его нельзя просто переключить на последнюю версию, я бы хотел понять, почему один из этих способов работает, а другой - нет.
Единственное очевидное отличие состоит в том, что в аварийном коде используются глобальные переменные для движка и сеанса - они передаются через копирование при записи дочерним процессам. Однако, поскольку я сбрасываю их сразу после разветвления, я не понимаю, как это может быть проблемой.
ОбновитьЯ перезапустил две части кода с последней SQLAlchemy (1.1.5), используя как Python 2.7, так и Python 3.4. На обоих результаты в основном как описано выше. Однако в Python 2.7 крах первого фрагмента кода теперь происходит в 13-й итерации (воспроизводимо), в то время как в 3.4 он уже происходит в третьей итерации (также воспроизводимо). Второй фрагмент кода работает без проблем на обеих версиях. Вот обратная связь от 3.4:
1
2
3
Traceback (most recent call last):
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 1182, in _execute_context
context)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/default.py", line 470, in do_execute
cursor.execute(statement, parameters)
psycopg2.OperationalError: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "fork_test.py", line 64, in <module>
main()
File "fork_test.py", line 55, in main
session.refresh(dummy)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/session.py", line 1424, in refresh
only_load_props=attribute_names) is None:
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/loading.py", line 223, in load_on_ident
return q.one()
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/query.py", line 2749, in one
ret = self.one_or_none()
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/query.py", line 2719, in one_or_none
ret = list(self)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/query.py", line 2790, in __iter__
return self._execute_and_instances(context)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/orm/query.py", line 2813, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 945, in execute
return meth(self, multiparams, params)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/sql/elements.py", line 263, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 1053, in _execute_clauseelement
compiled_sql, distilled_params
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 1189, in _execute_context
context)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 1393, in _handle_dbapi_exception
exc_info
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/util/compat.py", line 203, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/util/compat.py", line 186, in reraise
raise value.with_traceback(tb)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/base.py", line 1182, in _execute_context
context)
File "/home/vagrant/latest-sqlalchemy-3.4/lib/python3.4/site-packages/sqlalchemy/engine/default.py", line 470, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
[SQL: 'SELECT dummies.id AS dummies_id, dummies.value AS dummies_value \nFROM dummies \nWHERE dummies.id = %(param_1)s'] [parameters: {'param_1': 3397}]
Вот журнал PostgreSQL (он одинаков для 2.7 и 3.4):
2017-01-18 10:59:36 UTC [22429-1] LOG: database system was shut down at 2017-01-18 10:59:35 UTC
2017-01-18 10:59:36 UTC [22429-2] LOG: MultiXact member wraparound protections are now enabled
2017-01-18 10:59:36 UTC [22428-1] LOG: database system is ready to accept connections
2017-01-18 10:59:36 UTC [22433-1] LOG: autovacuum launcher started
2017-01-18 10:59:36 UTC [22435-1] [unknown]@[unknown] LOG: incomplete startup packet
2017-01-18 11:00:10 UTC [22466-1] user@db LOG: SSL error: decryption failed or bad record mac
2017-01-18 11:00:10 UTC [22466-2] user@db LOG: could not receive data from client: Connection reset by peer
(Обратите внимание, что сообщение о неполном загрузочном пакетебезвреден)