Merge branch 'dataloader'
This commit is contained in:
commit
4a843f5f60
@ -30,5 +30,6 @@
|
|||||||
- [Actix-web](integrations_to_actix_web.md)
|
- [Actix-web](integrations_to_actix_web.md)
|
||||||
- [Advanced topics](advanced_topics.md)
|
- [Advanced topics](advanced_topics.md)
|
||||||
- [Custom scalars](custom_scalars.md)
|
- [Custom scalars](custom_scalars.md)
|
||||||
|
- [Optimizing N+1 queries](dataloader.md)
|
||||||
- [Custom extensions](custom_extensions.md)
|
- [Custom extensions](custom_extensions.md)
|
||||||
- [Apollo Federation](apollo_federation.md)
|
- [Apollo Federation](apollo_federation.md)
|
||||||
|
112
docs/en/src/dataloader.md
Normal file
112
docs/en/src/dataloader.md
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
# Optimizing N+1 queries
|
||||||
|
|
||||||
|
Have you noticed some GraphQL queries end can make hundreds of database queries, often with mostly repeated data? Lets take a look why and how to fix it.
|
||||||
|
|
||||||
|
## Query Resolution
|
||||||
|
|
||||||
|
Imagine if you have a simple query like this:
|
||||||
|
|
||||||
|
```graphql
|
||||||
|
query { todos { users { name } } }
|
||||||
|
```
|
||||||
|
|
||||||
|
and `User` resolver is like this:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
struct User {
|
||||||
|
id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Object]
|
||||||
|
impl User {
|
||||||
|
async fn name(&self, ctx: &Context<'_>) -> Result<String> {
|
||||||
|
let pool = ctx.data_unchecked::<Pool<Postgres>>();
|
||||||
|
let (name,): (String,) = sqlx::query_as("SELECT name FROM user WHERE id = $1")
|
||||||
|
.bind(self.id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The query executor will call the `Todos` resolver which does a `select * from todo and return N todos`. Then for each
|
||||||
|
of the todos, concurrently, call the `User` resolver, `SELECT from USER where id = todo.user_id`.
|
||||||
|
|
||||||
|
eg:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT id, todo, user_id FROM todo
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
After executing `SELECT name FROM user WHERE id = $1` many times, and most `Todo` objects belong to the same user, we
|
||||||
|
need to optimize these codes!
|
||||||
|
|
||||||
|
## Dataloader
|
||||||
|
|
||||||
|
We need to group queries and exclude duplicate queries. `Dataloader` can do this.
|
||||||
|
[facebook](https://github.com/facebook/dataloader) gives a request-scope batch and caching solution.
|
||||||
|
|
||||||
|
The following is an example of using `DataLoader` to optimize queries::
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use async_graphql::*;
|
||||||
|
use async_graphql::dataloader::*;
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
struct UserNameLoader {
|
||||||
|
pool: sqlx::Pool<Postgres>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl Loader for UserNameLoader {
|
||||||
|
type Key = u64;
|
||||||
|
type Value = String;
|
||||||
|
type Error = sqlx::Error;
|
||||||
|
|
||||||
|
async fn load(&self, keys: HashSet<Self::Key>) -> Result<HashMap<Self::Key, Self::Value>, Self::Error> {
|
||||||
|
let pool = ctx.data_unchecked::<Pool<Postgres>>();
|
||||||
|
let query = format!("SELECT name FROM user WHERE id IN ({})", keys.iter().join(","));
|
||||||
|
Ok(sqlx::query_as(query)
|
||||||
|
.fetch(&self.pool)
|
||||||
|
.map_ok(|name: String| name)
|
||||||
|
.try_collect().await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct User {
|
||||||
|
id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Object]
|
||||||
|
impl User {
|
||||||
|
async fn name(&self, ctx: &Context<'_>) -> Result<String> {
|
||||||
|
let loader = ctx.data_unchecked::<DataLoader<UserNameLoader>>();
|
||||||
|
let name: Option<String> = loader.load_one(self.id).await?;
|
||||||
|
name.ok_or_else(|| "Not found".into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the end, only two SQLs are needed to query the results we want!
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT id, todo, user_id FROM todo
|
||||||
|
SELECT name FROM user WHERE id IN (1, 2, 3, 4)
|
||||||
|
```
|
@ -30,5 +30,6 @@
|
|||||||
- [Actix-web](integrations_to_actix_web.md)
|
- [Actix-web](integrations_to_actix_web.md)
|
||||||
- [高级主题](advanced_topics.md)
|
- [高级主题](advanced_topics.md)
|
||||||
- [自定义标量](custom_scalars.md)
|
- [自定义标量](custom_scalars.md)
|
||||||
|
- [优化查询(解决N+1问题)](dataloader.md)
|
||||||
- [自定义扩展](custom_extensions.md)
|
- [自定义扩展](custom_extensions.md)
|
||||||
- [Apollo Federation集成](apollo_federation.md)
|
- [Apollo Federation集成](apollo_federation.md)
|
||||||
|
110
docs/zh-CN/src/dataloader.md
Normal file
110
docs/zh-CN/src/dataloader.md
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
# 优化查询(解决N+1问题)
|
||||||
|
|
||||||
|
您是否注意到某些GraphQL查询需要执行数百个数据库查询,这些查询通常包含重复的数据,让我们来看看为什么以及如何修复它。
|
||||||
|
|
||||||
|
## 查询解析
|
||||||
|
|
||||||
|
想象一下,如果您有一个简单的查询,例如:
|
||||||
|
|
||||||
|
```graphql
|
||||||
|
query { todos { users { name } } }
|
||||||
|
```
|
||||||
|
|
||||||
|
实现`User`的resolver代码如下:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
struct User {
|
||||||
|
id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Object]
|
||||||
|
impl User {
|
||||||
|
async fn name(&self, ctx: &Context<'_>) -> Result<String> {
|
||||||
|
let pool = ctx.data_unchecked::<Pool<Postgres>>();
|
||||||
|
let (name,): (String,) = sqlx::query_as("SELECT name FROM user WHERE id = $1")
|
||||||
|
.bind(self.id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
执行查询将调用`Todos`的resolver,该resolver执行`SELECT * FROM todo`并返回N个`Todo`对象。然后对每个`Todo`对象同时调用`User`的
|
||||||
|
resolver执行`SELECT name FROM user where id = $1`。
|
||||||
|
|
||||||
|
例如:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT id, todo, user_id FROM todo
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
SELECT name FROM user WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
执行了多次`SELECT name FROM user WHERE id = $1`,并且,大多数`Todo`对象都属于同一个用户,我们需要优化这些代码!
|
||||||
|
|
||||||
|
## Dataloader
|
||||||
|
|
||||||
|
我们需要对查询分组,并且排除重复的查询。`Dataloader`就能完成这个工作,[facebook](https://github.com/facebook/dataloader) 给出了一个请求范围的批处理和缓存解决方案。
|
||||||
|
|
||||||
|
下面是使用`DataLoader`来优化查询请求的例子:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use async_graphql::*;
|
||||||
|
use async_graphql::dataloader::*;
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
struct UserNameLoader {
|
||||||
|
pool: sqlx::Pool<Postgres>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl Loader for UserNameLoader {
|
||||||
|
type Key = u64;
|
||||||
|
type Value = String;
|
||||||
|
type Error = sqlx::Error;
|
||||||
|
|
||||||
|
async fn load(&self, keys: HashSet<Self::Key>) -> Result<HashMap<Self::Key, Self::Value>, Self::Error> {
|
||||||
|
let pool = ctx.data_unchecked::<Pool<Postgres>>();
|
||||||
|
let query = format!("SELECT name FROM user WHERE id IN ({})", keys.iter().join(","));
|
||||||
|
Ok(sqlx::query_as(query)
|
||||||
|
.fetch(&self.pool)
|
||||||
|
.map_ok(|name: String| name)
|
||||||
|
.try_collect().await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct User {
|
||||||
|
id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Object]
|
||||||
|
impl User {
|
||||||
|
async fn name(&self, ctx: &Context<'_>) -> Result<String> {
|
||||||
|
let loader = ctx.data_unchecked::<DataLoader<UserNameLoader>>();
|
||||||
|
let name: Option<String> = loader.load_one(self.id).await?;
|
||||||
|
name.ok_or_else(|| "Not found".into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
最终只需要两个查询语句,就查询出了我们想要的结果!
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT id, todo, user_id FROM todo
|
||||||
|
SELECT name FROM user WHERE id IN (1, 2, 3, 4)
|
||||||
|
```
|
2
examples
2
examples
@ -1 +1 @@
|
|||||||
Subproject commit 7ca8aad9f056dff072537e27ae2ea6db01831957
|
Subproject commit bb155d8b69609a7d75e810bab18bf6ee1281b958
|
@ -144,13 +144,13 @@ impl<T: Loader> DataLoader<T> {
|
|||||||
pub fn new(loader: T) -> Self {
|
pub fn new(loader: T) -> Self {
|
||||||
Self {
|
Self {
|
||||||
requests: Default::default(),
|
requests: Default::default(),
|
||||||
delay: Duration::from_millis(20),
|
delay: Duration::from_millis(1),
|
||||||
max_batch_size: 1000,
|
max_batch_size: 1000,
|
||||||
loader,
|
loader,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Specify the delay time for loading data, the default is `20ms`.
|
/// Specify the delay time for loading data, the default is `1ms`.
|
||||||
pub fn delay(self, delay: Duration) -> Self {
|
pub fn delay(self, delay: Duration) -> Self {
|
||||||
Self { delay, ..self }
|
Self { delay, ..self }
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user