Open
Description
During development (Starlette + Ariadne) of our product I noticed significant performance degradation when GraphQL responses got significant long (1000+ entities in a list). I started profiling and drilling into the issue and I pinpointed it to async
resolvers. Whenever a resolver is async and it is called a lot (100.000) you can see significant slowdowns 4x-7x, even if there is nothing async to it.
The question that I ended up with, is this a limitation of Python asyncio or how the results are gathered for async fields in graphql execute?
Any insight/help is greatly appreciated as we really need more performance and changing to sync is not really an option (nor is rewriting it to another language) 😭
import asyncio
from dataclasses import dataclass
from graphql import (
GraphQLField,
GraphQLList,
GraphQLObjectType,
GraphQLSchema,
GraphQLString,
graphql,
)
# On a MacBook Pro M1 Max this takes around 44 seconds in "async" case
# and "only" 7 seconds when async field is removed from the query
TOTAL_PEOPLE = 1000000
@dataclass
class Person:
firstName: str
lastName: str
def resolve_people(*_):
return [Person(f"Jane{i}", "Do") for i in range(0, TOTAL_PEOPLE)]
def resolve_fullname(person: Person, *_):
return f"{person.firstName} {person.lastName}"
async def async_resolve_fullname(person: Person, *_):
return f"{person.firstName} {person.lastName}"
PersonType = GraphQLObjectType(
"Person",
{
"firstName": GraphQLField(GraphQLString),
"lastName": GraphQLField(GraphQLString),
"fullName": GraphQLField(GraphQLString, resolve=resolve_fullname),
"asyncFullName": GraphQLField(GraphQLString, resolve=async_resolve_fullname),
},
)
schema = GraphQLSchema(
query=GraphQLObjectType(
name="RootQueryType",
fields={
"people": GraphQLField(GraphQLList(PersonType), resolve=resolve_people)
},
)
)
async def main():
result = await graphql(
schema,
"""#graphql
query {
people {
firstName
lastName
fullName
asyncFullName # THIS IS THE SLOW PART
}
}
""",
)
assert len(result.data["people"]) == TOTAL_PEOPLE
def run(callable, is_profile: bool = False):
if not is_profile:
return asyncio.run(callable())
import yappi
yappi.set_clock_type("WALL")
with yappi.run():
asyncio.run(callable())
func_stats = yappi.get_func_stats()
func_stats.save("callgrind.func_stats", "callgrind")
with open("func_stats.txt", "w") as file:
func_stats.print_all(
file,
{
# We increase the column widths significantly
0: ("name", 120),
1: ("ncall", 10),
2: ("tsub", 12),
3: ("ttot", 12),
4: ("tavg", 12),
},
)
if __name__ == "__main__":
run(main, is_profile=False)
Versions:
- macOS: 13.2
- python 3.11
- graphql-core 3.2.3
- yappi 1.4.0