diff --git a/docs/en/src/SUMMARY.md b/docs/en/src/SUMMARY.md index cdcd21cf..5814a54b 100644 --- a/docs/en/src/SUMMARY.md +++ b/docs/en/src/SUMMARY.md @@ -30,5 +30,6 @@ - [Actix-web](integrations_to_actix_web.md) - [Advanced topics](advanced_topics.md) - [Custom scalars](custom_scalars.md) + - [Optimizing N+1 queries](dataloader.md) - [Custom extensions](custom_extensions.md) - [Apollo Federation](apollo_federation.md) diff --git a/docs/en/src/dataloader.md b/docs/en/src/dataloader.md new file mode 100644 index 00000000..fc708622 --- /dev/null +++ b/docs/en/src/dataloader.md @@ -0,0 +1,112 @@ +# Optimizing N+1 queries + +Have you noticed some GraphQL queries end can make hundreds of database queries, often with mostly repeated data? Lets take a look why and how to fix it. + +## Query Resolution + +Imagine if you have a simple query like this: + +```graphql +query { todos { users { name } } } +``` + +and `User` resolver is like this: + +```rust +struct User { + id: u64, +} + +#[Object] +impl User { + async fn name(&self, ctx: &Context<'_>) -> Result { + let pool = ctx.data_unchecked::>(); + let (name,): (String,) = sqlx::query_as("SELECT name FROM user WHERE id = $1") + .bind(self.id) + .fetch_one(pool) + .await?; + Ok(name) + } +} +``` + +The query executor will call the `Todos` resolver which does a `select * from todo and return N todos`. Then for each +of the todos, concurrently, call the `User` resolver, `SELECT from USER where id = todo.user_id`. + +eg: + +```sql +SELECT id, todo, user_id FROM todo +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +``` + +After executing `SELECT name FROM user WHERE id = $1` many times, and most `Todo` objects belong to the same user, we +need to optimize these codes! + +## Dataloader + +We need to group queries and exclude duplicate queries. `Dataloader` can do this. +[facebook](https://github.com/facebook/dataloader) gives a request-scope batch and caching solution. + +The following is an example of using `DataLoader` to optimize queries:: + +```rust +use async_graphql::*; +use async_graphql::dataloader::*; +use itertools::Itertools; + +struct UserNameLoader { + pool: sqlx::Pool, +} + +#[async_trait::async_trait] +impl Loader for UserNameLoader { + type Key = u64; + type Value = String; + type Error = sqlx::Error; + + async fn load(&self, keys: HashSet) -> Result, Self::Error> { + let pool = ctx.data_unchecked::>(); + let query = format!("SELECT name FROM user WHERE id IN ({})", keys.iter().join(",")); + Ok(sqlx::query_as(query) + .fetch(&self.pool) + .map_ok(|name: String| name) + .try_collect().await?) + } +} + +struct User { + id: u64, +} + +#[Object] +impl User { + async fn name(&self, ctx: &Context<'_>) -> Result { + let loader = ctx.data_unchecked::>(); + let name: Option = loader.load_one(self.id).await?; + name.ok_or_else(|| "Not found".into()) + } +} +``` + +In the end, only two SQLs are needed to query the results we want! + +```sql +SELECT id, todo, user_id FROM todo +SELECT name FROM user WHERE id IN (1, 2, 3, 4) +``` diff --git a/docs/zh-CN/src/SUMMARY.md b/docs/zh-CN/src/SUMMARY.md index 448512e6..c643ed30 100644 --- a/docs/zh-CN/src/SUMMARY.md +++ b/docs/zh-CN/src/SUMMARY.md @@ -30,5 +30,6 @@ - [Actix-web](integrations_to_actix_web.md) - [高级主题](advanced_topics.md) - [自定义标量](custom_scalars.md) + - [优化查询(解决N+1问题)](dataloader.md) - [自定义扩展](custom_extensions.md) - [Apollo Federation集成](apollo_federation.md) diff --git a/docs/zh-CN/src/dataloader.md b/docs/zh-CN/src/dataloader.md new file mode 100644 index 00000000..12685f0a --- /dev/null +++ b/docs/zh-CN/src/dataloader.md @@ -0,0 +1,110 @@ +# 优化查询(解决N+1问题) + +您是否注意到某些GraphQL查询需要执行数百个数据库查询,这些查询通常包含重复的数据,让我们来看看为什么以及如何修复它。 + +## 查询解析 + +想象一下,如果您有一个简单的查询,例如: + +```graphql +query { todos { users { name } } } +``` + +实现`User`的resolver代码如下: + +```rust +struct User { + id: u64, +} + +#[Object] +impl User { + async fn name(&self, ctx: &Context<'_>) -> Result { + let pool = ctx.data_unchecked::>(); + let (name,): (String,) = sqlx::query_as("SELECT name FROM user WHERE id = $1") + .bind(self.id) + .fetch_one(pool) + .await?; + Ok(name) + } +} +``` + +执行查询将调用`Todos`的resolver,该resolver执行`SELECT * FROM todo`并返回N个`Todo`对象。然后对每个`Todo`对象同时调用`User`的 +resolver执行`SELECT name FROM user where id = $1`。 + +例如: + +```sql +SELECT id, todo, user_id FROM todo +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +SELECT name FROM user WHERE id = $1 +``` + +执行了多次`SELECT name FROM user WHERE id = $1`,并且,大多数`Todo`对象都属于同一个用户,我们需要优化这些代码! + +## Dataloader + +我们需要对查询分组,并且排除重复的查询。`Dataloader`就能完成这个工作,[facebook](https://github.com/facebook/dataloader) 给出了一个请求范围的批处理和缓存解决方案。 + +下面是使用`DataLoader`来优化查询请求的例子: + +```rust +use async_graphql::*; +use async_graphql::dataloader::*; +use itertools::Itertools; + +struct UserNameLoader { + pool: sqlx::Pool, +} + +#[async_trait::async_trait] +impl Loader for UserNameLoader { + type Key = u64; + type Value = String; + type Error = sqlx::Error; + + async fn load(&self, keys: HashSet) -> Result, Self::Error> { + let pool = ctx.data_unchecked::>(); + let query = format!("SELECT name FROM user WHERE id IN ({})", keys.iter().join(",")); + Ok(sqlx::query_as(query) + .fetch(&self.pool) + .map_ok(|name: String| name) + .try_collect().await?) + } +} + +struct User { + id: u64, +} + +#[Object] +impl User { + async fn name(&self, ctx: &Context<'_>) -> Result { + let loader = ctx.data_unchecked::>(); + let name: Option = loader.load_one(self.id).await?; + name.ok_or_else(|| "Not found".into()) + } +} +``` + +最终只需要两个查询语句,就查询出了我们想要的结果! + +```sql +SELECT id, todo, user_id FROM todo +SELECT name FROM user WHERE id IN (1, 2, 3, 4) +``` diff --git a/examples b/examples index 7ca8aad9..bb155d8b 160000 --- a/examples +++ b/examples @@ -1 +1 @@ -Subproject commit 7ca8aad9f056dff072537e27ae2ea6db01831957 +Subproject commit bb155d8b69609a7d75e810bab18bf6ee1281b958 diff --git a/src/dataloader/mod.rs b/src/dataloader/mod.rs index 1936b2e7..5e13e696 100644 --- a/src/dataloader/mod.rs +++ b/src/dataloader/mod.rs @@ -144,13 +144,13 @@ impl DataLoader { pub fn new(loader: T) -> Self { Self { requests: Default::default(), - delay: Duration::from_millis(20), + delay: Duration::from_millis(1), max_batch_size: 1000, loader, } } - /// Specify the delay time for loading data, the default is `20ms`. + /// Specify the delay time for loading data, the default is `1ms`. pub fn delay(self, delay: Duration) -> Self { Self { delay, ..self } } diff --git a/src/lib.rs b/src/lib.rs index 5a330922..5e1b2107 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,6 +77,7 @@ //! - `url`: Integrate with the [`url` crate](https://crates.io/crates/url). //! - `uuid`: Integrate with the [`uuid` crate](https://crates.io/crates/uuid). //! - `string_number`: Enable the [StringNumber](types/struct.StringNumber.html). +//! - `dataloader`: Support [DataLoader](dataloader/struct.DataLoader.html). //! //! ## Integrations //!