relationship loading technology in SQLAlchemy



准备工作

In [1]:
from sqlalchemy import create_engine, Column, Integer, ForeignKey, String
from sqlalchemy.ext.declarative import as_declarative
from sqlalchemy.orm import relationship, sessionmaker

engine = create_engine('sqlite:///:memory', echo=True)

@as_declarative(bind=engine)
class Base:
    id = Column(Integer, primary_key=True)


class User(Base):
    __tablename__ = 'users'
    
    username = Column(String)

    addresses = relationship('Address', lazy='select')


class Address(Base):
    __tablename__ = 'addresses'
    
    user_id = Column(Integer, ForeignKey('users.id'))
    street = Column(String)
    
    user = relationship('User')


Session = sessionmaker(bind=engine)
session = Session()

if __name__ == '__main__':
    Base.metadata.drop_all()
    Base.metadata.create_all()
    
    # 创建测试数据
    user1 = User(username='小明')
    user2 = User(username='小红')
    address1 = Address(street='皇后大道')
    address1.user = user1
    address2 = Address(street='长安大道')
    address2.user = user2
    
    address3 = Address(street='皇后大道')
    address3.user = user1
    address4 = Address(street='长安大道')
    address4.user = user2

    session.add(address1)
    session.add(address2)
    session.commit()
2020-01-12 22:40:56,473 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-01-12 22:40:56,474 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,475 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-01-12 22:40:56,475 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,476 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("users")
2020-01-12 22:40:56,476 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,478 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("addresses")
2020-01-12 22:40:56,478 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,479 INFO sqlalchemy.engine.base.Engine 
DROP TABLE addresses
2020-01-12 22:40:56,480 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,488 INFO sqlalchemy.engine.base.Engine COMMIT
2020-01-12 22:40:56,490 INFO sqlalchemy.engine.base.Engine 
DROP TABLE users
2020-01-12 22:40:56,491 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,497 INFO sqlalchemy.engine.base.Engine COMMIT
2020-01-12 22:40:56,498 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("users")
2020-01-12 22:40:56,498 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,499 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("users")
2020-01-12 22:40:56,500 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,500 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("addresses")
2020-01-12 22:40:56,501 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,502 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("addresses")
2020-01-12 22:40:56,502 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,503 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE users (
	id INTEGER NOT NULL, 
	username VARCHAR, 
	PRIMARY KEY (id)
)


2020-01-12 22:40:56,504 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,513 INFO sqlalchemy.engine.base.Engine COMMIT
2020-01-12 22:40:56,514 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE addresses (
	id INTEGER NOT NULL, 
	user_id INTEGER, 
	street VARCHAR, 
	PRIMARY KEY (id), 
	FOREIGN KEY(user_id) REFERENCES users (id)
)


2020-01-12 22:40:56,514 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,520 INFO sqlalchemy.engine.base.Engine COMMIT
2020-01-12 22:40:56,525 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-12 22:40:56,526 INFO sqlalchemy.engine.base.Engine INSERT INTO users (username) VALUES (?)
2020-01-12 22:40:56,527 INFO sqlalchemy.engine.base.Engine ('小明',)
2020-01-12 22:40:56,528 INFO sqlalchemy.engine.base.Engine INSERT INTO users (username) VALUES (?)
2020-01-12 22:40:56,528 INFO sqlalchemy.engine.base.Engine ('小红',)
2020-01-12 22:40:56,529 INFO sqlalchemy.engine.base.Engine INSERT INTO addresses (user_id, street) VALUES (?, ?)
2020-01-12 22:40:56,530 INFO sqlalchemy.engine.base.Engine (1, '皇后大道')
2020-01-12 22:40:56,531 INFO sqlalchemy.engine.base.Engine INSERT INTO addresses (user_id, street) VALUES (?, ?)
2020-01-12 22:40:56,531 INFO sqlalchemy.engine.base.Engine (2, '长安大道')
2020-01-12 22:40:56,532 INFO sqlalchemy.engine.base.Engine COMMIT

relationship的加载机制

lazyload

lazyload的机制是对象查询出来以后, 不会主动去获取和它关联的对象, 而是在获取关联对象时才会去查询, 通过relationship.lazy='select'设置

In [7]:
users = session.query(User).all()
for user in users:
    print(user.addresses)

session.expire_all()

print('-------------------------------------')
    
addresses = session.query(Address).all()
for address in addresses:
    print(address.user)
    
session.expire_all()
2020-01-12 22:43:06,152 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users
2020-01-12 22:43:06,153 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:43:06,154 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street 
FROM addresses 
WHERE ? = addresses.user_id
2020-01-12 22:43:06,155 INFO sqlalchemy.engine.base.Engine (1,)
[<__main__.Address object at 0x7f9631f7b630>]
2020-01-12 22:43:06,156 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street 
FROM addresses 
WHERE ? = addresses.user_id
2020-01-12 22:43:06,157 INFO sqlalchemy.engine.base.Engine (2,)
[<__main__.Address object at 0x7f9631f7b6d8>]
-------------------------------------
2020-01-12 22:43:06,159 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street 
FROM addresses
2020-01-12 22:43:06,159 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:43:06,160 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users 
WHERE users.id = ?
2020-01-12 22:43:06,161 INFO sqlalchemy.engine.base.Engine (1,)
<__main__.User object at 0x7f964d2c09b0>
2020-01-12 22:43:06,162 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users 
WHERE users.id = ?
2020-01-12 22:43:06,163 INFO sqlalchemy.engine.base.Engine (2,)
<__main__.User object at 0x7f9631fc52b0>

现在有两个用户, 这里会执行三条sql, 第一条sql查询所有的user,

在遍历user的过程中, 当尝试获取address时, 会查询对应的address.

select在数据量大的时候性能很差, 最好是在每次只查一条数据的时候使用.

假设user数量为n, 查询到所有的user和user对应的address, 需要执行n+1条sql.

需要注意的时如果查询address时去获取user, 那么同一个user只会获取一次, 不会多次获取.

这里的address有4条记录, 属于两个user, 这里就只额外查询了两次address对应的user

eagerload

eagerload在获取对象时, 会将它所关联的对象也一并查询出来, eagerload有三种joined, subquery, selectin

In [3]:
from sqlalchemy.orm import joinedload, selectinload, subqueryload

users = session.query(User).options(joinedload(User.addresses)).all()
print('------------------------joined load finished------------------------')

users = session.query(User).options(subqueryload(User.addresses)).all()
print('------------------------subquery load finished------------------------')

users = session.query(User).options(selectinload(User.addresses)).all()
print('------------------------selectin load finished------------------------')

session.expire_all()
2020-01-12 22:40:56,555 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username, addresses_1.id AS addresses_1_id, addresses_1.user_id AS addresses_1_user_id, addresses_1.street AS addresses_1_street 
FROM users LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id
2020-01-12 22:40:56,556 INFO sqlalchemy.engine.base.Engine ()
------------------------joined load finished------------------------
2020-01-12 22:40:56,558 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users
2020-01-12 22:40:56,559 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,561 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street, anon_1.users_id AS anon_1_users_id 
FROM (SELECT users.id AS users_id 
FROM users) AS anon_1 JOIN addresses ON anon_1.users_id = addresses.user_id ORDER BY anon_1.users_id
2020-01-12 22:40:56,562 INFO sqlalchemy.engine.base.Engine ()
------------------------subquery load finished------------------------
2020-01-12 22:40:56,564 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users
2020-01-12 22:40:56,564 INFO sqlalchemy.engine.base.Engine ()
2020-01-12 22:40:56,566 INFO sqlalchemy.engine.base.Engine SELECT addresses.user_id AS addresses_user_id, addresses.id AS addresses_id, addresses.street AS addresses_street 
FROM addresses 
WHERE addresses.user_id IN (?, ?) ORDER BY addresses.user_id
2020-01-12 22:40:56,566 INFO sqlalchemy.engine.base.Engine (1, 2)
------------------------selectin load finished------------------------

这里通过options来覆盖默认的lazy设置, 也可以在创建model的时候定义.

这里可以看到获取到users的时候, 对于三种load方式, 都已经加载好了user对应的address

  • joinedload方式只执行了一条sql, 但是因为一个用户会有多个address, 所以需要对重复的记录作合并

  • subquery方式执行了两条sql, 第一条查询所有的user, 第二条通过构造子查询和address作join操作

  • selectin的方式于subquery类似, 区别在于第二条sql, 利用第一步获取的user id构造一个in查询

需要注意的时selectin方式对id的数量有限制, 默认会分成500个作为一批, 分批去做查询

一般来说, 如果是一对一的话, joined的方式最佳, 如果一对多的话使用selectin方式最好, 可以避免一次加载大量的数据.

noload

顾名思义, 这是用来禁止加载关联的对象, 根据行为可以分为noload和raiseload

In [4]:
from sqlalchemy.orm import raiseload, noload



users = session.query(User).options(noload(User.addresses)).all()
for user in users:
    print(user.addresses)
print('------------------------noload finished------------------------')

users = session.query(User).options(raiseload(User.addresses)).all()
for user in users:
    print(user.addresses)
print('------------------------raiseload finished------------------------')

addresses = session.query(Address).options(noload(Address.user)).all()
for address in addresses:
    print(address.user)
print('------------------------noload finished------------------------')

addresses = session.query(Address).options(raiseload(Address.user)).all()
for address in addresses:
    print(address.user)
print('------------------------raiseload finished------------------------')

session.expire_all()
2020-01-12 22:40:56,574 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users
2020-01-12 22:40:56,575 INFO sqlalchemy.engine.base.Engine ()
[]
[]
------------------------noload finished------------------------
2020-01-12 22:40:56,577 INFO sqlalchemy.engine.base.Engine SELECT users.id AS users_id, users.username AS users_username 
FROM users
2020-01-12 22:40:56,577 INFO sqlalchemy.engine.base.Engine ()
[]
[]
------------------------raiseload finished------------------------
2020-01-12 22:40:56,578 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street 
FROM addresses
2020-01-12 22:40:56,579 INFO sqlalchemy.engine.base.Engine ()
None
None
------------------------noload finished------------------------
2020-01-12 22:40:56,580 INFO sqlalchemy.engine.base.Engine SELECT addresses.id AS addresses_id, addresses.user_id AS addresses_user_id, addresses.street AS addresses_street 
FROM addresses
2020-01-12 22:40:56,581 INFO sqlalchemy.engine.base.Engine ()
None
None
------------------------raiseload finished------------------------

raiseload使用options的方式时获取属性不会报错, 定义在model中的时候才会报错, 这时获取属性会返回空列表或者None,

Comments