Site icon ashishb.net

API backend should use dataloaders

Data Loaders allow transparent batching of requests to a data provider (e.g. database). More often than not, this leads to reduced latency and better performance without forcing an explicit batching of requests for the API users, for example, your frontend developers.

Many programmers relate data loaders to Graph QL for N+1 query patterns. I believe data loaders are a great idea any time you are building an API backend. Let me illustrate the concept with a simple example. And while I am using Go as an example, data loader implementations are available in many languages.

Let’s take a toy example of a database service that will take 50 ± 20 ms to respond to a single request. And for the sake of simplicity let’s assume it will take 2X longer to respond to a batch of up to 10 requests. All these numbers are configurable in the code below for simulation. With some experimentation, you can figure out the approximate range for your database service as well. As a concrete example, assume that given a student ID, the service returns the student’s score. And to add error flow to it, the service returns an error when the student ID is a negative number.

Let’s assume there is a DBService

type DBService interface {
    GetScore(studentID int) Resul
    // For batched results
    GetScores(studentID []int) []Result
}

And we will implement some boilerplate web service that uses this without a data loader

// Code without dataloader
package main

import (
  "context"
    "fmt"
    "math/rand"
    "net/http"
    "strconv"
    "time"
)

type DBService interface {
    GetScore(ctx context.Context, studentID int) Result
    GetScores(ctx context.Context, studentID []int) []Result
}

type Result struct {
    // Only of these will be non-nil
    Score *int
    Err   error
}

type FakeDBService struct {
}

const _baseDelay = 50 * time.Millisecond
const _delayVariance = 20 * time.Millisecond

func (f FakeDBService) GetScore(_ context.Context, studentID int) Result {
    if studentID <= 0 {
        return Result{
            nil,
            fmt.Errorf("invalid student ID"),
        }
    }
    delayVariance := time.Duration(rand.Int63n(int64(_delayVariance)))
    // Simulate processing delay
    time.Sleep(_baseDelay + delayVariance)
    // Deterministic score for demo purposes
    score := studentID * studentID
    return Result{
        &score,
        nil,
    }
}

func handler(writer http.ResponseWriter, request *http.Request) {
    fakeDBService := &FakeDBService{}
    // read student ID from request
    studentIDStr := request.URL.Query().Get("student_id")
    if studentIDStr == "" {
        writer.WriteHeader(http.StatusBadRequest)
        return
    }
    studentID, err := strconv.Atoi(studentIDStr)
    if err != nil {
        writer.WriteHeader(http.StatusBadRequest)
        return
    }
    result := fakeDBService.GetScore(request.Context(), studentID)
    if result.Err != nil {
        writer.WriteHeader(http.StatusInternalServerError)
        return
    }
    // write score to response
    _, err = writer.Write([]byte(strconv.Itoa(*result.Score)))
    if err != nil {
        writer.WriteHeader(http.StatusInternalServerError)
        return
    }
}

func main() {
    // start web server
    http.HandleFunc("/score", handler)
    fmt.Printf("Starting server on port 8080...\n")
    err := http.ListenAndServe(":8080", nil)
    if err != nil {
        fmt.Printf("ListenAndServe: %v\n", err)
    }
}

Now, our goal is to add data loaders here, let’s use Go Modules for that

# In the same dir containing test_main.go
$ go mod init example.com/m
$ go mod tidy
$ go get -u github.com/graph-gophers/dataloader

Now, let’s add the fake/demo batch reader

func (f FakeDBService) GetScores(_ context.Context, studentIDs []int) []Result {
    fmt.Printf("GetScores called with student IDs: %+v\n", studentIDs)
    result := make([]Result, 0, len(studentIDs))
    for _, studentID := range studentIDs {
        if studentID <= 0 {
            result = append(result, Result{nil, fmt.Errorf("invalid student ID")})
        } else {
            // Deterministic score for demo purposes
            score := studentID * studentID
            result = append(result, Result{&score, nil})
        }
    }
    delayVariance := time.Duration(rand.Int63n(int64(_delayVariance)))
    // Simulate processing delay
    time.Sleep(2 * (_baseDelay + delayVariance))
    return result
}

And add a batch loader

type FakeDBServiceWithBatching struct {
    dbService  DBService
    dataloader *dataloader.Loader
}

// This is where the magic of batching configuration happens
func newDBServiceWithBatching(dbService DBService) *FakeDBServiceWithBatching {
    // setup batch function - the first Context passed to the Loader's Load
    // function will be provided when the batch function is called.
    batchFn := func(ctx context.Context, keys dataloader.Keys) []*dataloader.Result {
        results := make([]*dataloader.Result, 0, len(keys))
        dbResults := dbService.GetScores(ctx, getStudentIDs(keys))
        for _, dbResult := range dbResults {
            results = append(results, &dataloader.Result{Data: dbResult, Error: dbResult.Err})
        }
        return results
    }

    return &FakeDBServiceWithBatching{
        dbService: dbService,
        dataloader: dataloader.NewBatchedLoader(batchFn,
            dataloader.WithClearCacheOnBatch(),
            dataloader.WithWait(10*time.Millisecond),
            dataloader.WithBatchCapacity(10)),
    }
}

// This transparently batches calls to the underlying DBService
// at most 10 requests - configured via dataloader.WithBatchCapacity
// and at most 10 ms of batch filling time  - configured via dataloader.WithWait
func (f FakeDBServiceWithBatching) GetScore(ctx context.Context, studentID int) Result {
    thunk := f.dataloader.Load(ctx, newIntKey(studentID))
    result, err := thunk()
    if err != nil {
        return Result{nil, err}
    }
    return Result{
        Score: result.(Result).Score,
        Err:   nil,
    }
}

func (f FakeDBServiceWithBatching) GetScores(ctx context.Context, studentID []int) []Result {
    return f.dbService.GetScores(ctx, studentID)
}

// Unnecessary boilerplate that will go away with Go Generics eventually

func getStudentIDs(keys dataloader.Keys) []int {
    studentIDs := make([]int, 0, len(keys))
    for _, key := range keys {
        studentIDs = append(studentIDs, key.Raw().(int))
    }
    return studentIDs
}

type IntKey struct {
    Key int
}

func newIntKey(key int) IntKey {
    return IntKey{key}
}

func (i IntKey) String() string {
    return fmt.Sprintf("%d", i.Key)
}

func (i IntKey) Raw() interface{} {
    return i.Key
}

Now, replace FakeDBService with FakeDBServiceWithBatching

fakeDBService := newDBServiceWithBatching(FakeDBService{})

That’s it, now, your service is ready for implicit batch requests!

You can test it by running Go Routines, for example,

Replace

result := fakeDBService.GetScore(request.Context(), studentID)

with

go fakeDBService.GetScore(request.Context(), 1)
go fakeDBService.GetScore(request.Context(), 5)
go fakeDBService.GetScore(request.Context(), 10)
go fakeDBService.GetScore(request.Context(), 21)
result := fakeDBService.GetScore(request.Context(), studentID)

This will print the log

GetScores called with student IDs: [999 1 21 10 5]

Now, of course, you might wonder if this would batch on a per-request basis and that’s because that’s how we configured it. We can always use request.Background() and then this would batch multiple requests into one. And that’s usually best for an API service.

Exit mobile version