Node Reference - List Products

By Paul Rowe, Matt Vincent | July 25, 2018 | Cloud

Teammates sitting together and looking at code

Prerequisites

This article builds on the prior article: Node Reference - Monitoring.

Add listing

Clients of our service will need a way to find products. For that, we need a product listing service. DynamoDB provides an operation for listing the contents of a table called a ‘scan’. (As a table or index grows, the Scan operation slows, but using Scan gets us started before you need to evolve to a Query instead). We will add a new route that listens on the /products url and returns an array of products. Add products/listProducts.spec.js with the following tests:

const proxyquire = require('proxyquire');

describe('products', function () {
    describe('listProducts', function () {
        beforeEach(function () {
            this.response = {
                Items: [{},{}]
            }

            this.context = {
                query: {},
                response: {
                    headers: {},
                    set(field, value) {
                        this.headers[field] = value;
                    }
                }
            };

            this.awsResult = {
                promise: () => Promise.resolve(this.response)
            };
            const documentClient = this.documentClient = {
                scan: (params) => this.awsResult
            };
            spyOn(this.documentClient, 'scan').and.callThrough();

            this.listProducts = proxyquire('./listProducts', {
                'aws-sdk': {
                    DynamoDB: {
                        DocumentClient: function() {
                            return documentClient;
                        }
                    }
                }
            });
        });

        it('should pass the correct TableName to documentClient.scan', async function () {
            await this.listProducts(this.context);
            expect(this.documentClient.scan.calls.argsFor(0)[0].TableName).toEqual('Products');
        });

        it('should return the product list', async function() {
            await this.listProducts(this.context);
            expect(this.context.body).toEqual(this.response.Items);
        });

        it('should limit the scan to 25 items', async function() {
            await this.listProducts(this.context);
            expect(this.documentClient.scan.calls.argsFor(0)[0].Limit).toEqual(25);
        });
    });
});

And the implementation in products/listProducts.js

const AWS = require('aws-sdk');
const documentClient = new AWS.DynamoDB.DocumentClient();
const productsTableName = process.env.PRODUCTS_TABLE_NAME || 'Products';

module.exports = async function getProductList(ctx) {
    const scanOutput = await documentClient.scan({
        Segment: ctx.segment,
        TableName: productsTableName,
        Limit: 25
    }).promise();

    ctx.body = scanOutput.Items;
};

Your tests should now be passing again:

unset PRODUCTS_TABLE_NAME
npm test

After adding the route to server.js we should be able to make a GET request to /products and get back an array of products.

function buildRouter() {
    ...
    router.get('/products', require('./products/listProducts'));
    ...
}

Now, you can start your service locally:

export PRODUCTS_TABLE_NAME=$(aws cloudformation describe-stacks \
    --stack-name ProductService-DEV \
    --query 'Stacks[0].Outputs[?OutputKey==`ProductsTable`].OutputValue' \
    --output text)
export USER_POOL_ID=$(aws cloudformation describe-stacks \
    --stack-name Cognito \
    --query 'Stacks[0].Outputs[?OutputKey==`UserPoolId`].OutputValue' \
    --output text)
export AWS_REGION="us-east-1"

npm start

Then list your products by calling the products GET endpoint:

AUTH_NAME="theproducts"

USER_POOL_ID=$(aws cloudformation describe-stacks \
    --stack-name Cognito \
    --query 'Stacks[0].Outputs[?OutputKey==`UserPoolId`].OutputValue' \
    --output text)

USER_POOL_CLIENT_ID=$(aws cognito-idp list-user-pool-clients \
    --user-pool-id "$USER_POOL_ID" \
    --max-results 1 \
    --query 'UserPoolClients[0].ClientId' --output text)

CLIENT_SECRET=$(aws cognito-idp describe-user-pool-client --user-pool-id "$USER_POOL_ID" --client-id "$USER_POOL_CLIENT_ID" --query 'UserPoolClient.ClientSecret' --output text)

BEARER_TOKEN=$(curl -s -X POST \
  https://${USER_POOL_CLIENT_ID}:${CLIENT_SECRET}@${AUTH_NAME}.auth.us-east-1.amazoncognito.com/oauth2/token \
  -H 'Content-Type: application/x-www-form-urlencoded' \
  -d grant_type=client_credentials | \
  python -c "import sys, json; print json.load(sys.stdin)['access_token']")

curl --request GET \
  --verbose http://localhost:3000/products \
  -H "Authorization: Bearer $BEARER_TOKEN"

Pagination

You will notice that we are limiting the list of products to only 25 items and may wonder why we are not returning the entire list. We may have been able to return all products if we are resonably sure the number of items will stay low but we can’t make the same assumption about other entities (ex. customer or order) that could have hundreds of thousands or even millions of records.

A best practice of RESTful microservices is to take a “self preservation” approach. Start by asking if there is a situation that would cause a RESTful service client’s request to do harm to the service. Requesting a million records from the database could cause the server to run out of memory and crash or take several seconds to complete and cause other requests to wait. Therefore, limiting the size of the response to a fixed number prevents these issues from occuring.

There is also a monitoring benefit to limiting the scope of requests. Microservices that are consistent in their performance are easier to monitor. If a route typically takes 150ms to serve a request and has been doing so for weeks until a few requests start to take 20 seconds, we can deduce that something is going on with that service because the requests should never take more than 150ms or so. Conversely, if the servers performance is dependent on the type or scope of request the client is sending, then it is difficult to know if that 20 second request is an anomaly or just the result of a client that decided to fetch 3 million records from the system in one shot.

In order to limit the size of data set that a client can request, we can implement paging. With pagination, the DynamoDB scan results are divided into “pages” of data that are 1 MB in size or less (The result set from a Scan is limited to 1 MB per call). A client requests the first page of results, then the second page, etc.

To implement paging, we need to transmit at least two important peices of data:

1. The server needs to tell the client that the response does not include all the data and that additional pages are available.
2. The client needs to tell the server in the request that a different page of results is needed than the server would have sent by default.

The first solution that probably comes to mind is to add a field to the response that says that there are more pages and a query parameter to the request to tell the server what page to return. The response would therefore look something like this:

{
    "items": [
        {
            "id": "xxx"
            ...
        },
        {
            "id": "yyy"
            ...
        }
    ],
    "currentPage": 3,
    "numberOfPages": 4
}

The request for the next page could look something like this:

https://example.com/products?page=3

This approach works, but it has some drawbacks: First, the response has to be changed to include an extra layer. This prevents clients from simply parsing the response into an array but instead needs to parse the response into some sort of wrapper object and then index into the array. This is somewhat inconvenient.

Second, the client must be aware of the name of the array field (called “items” above), as well as the page fields and the page parameter. All of these can be documented but it requires additional documentation by the server and because an API design decision is being made (e.g. what to call these fields), other services built by other teams might pick different names, leading to clients having to keep track of these inconsitencies across services.

All of these drawbacks are simple but they do increase the burden on clients as well as the technical risk around integration between services.

What if we could leverage an common HTTP standard that we could refer people to so that we did not have to make as many API design decisions and document those decisions? The Content-Range and Range headers are just such a standard.

The way it works is the server responds with a “Range” HTTP header that specifies the way the response is being broken up (for binary data bytes makes sense but any token works). Because “Range” is a header, we don’t have to change our response payload to add an extra object with extra fields. The same response as above would have a header that looks like this:

Content-Range: items 50-74/100

This tells the client that the body contains range 50 through 74 “items” of a total of 100 items. The client would then send a “Range” header in the request for the next page that looks like this:

Range: items=75-99

The server now knows what page to return. This is a significant improvement over the previous solution. It simplifies the response body and provides a contract leveraging HTTP standards, that both servers and clients can develop to.

It is, however, not without its own drawbacks. Both the client and server have to do quite a bit of logic to make sure this strategy works:

a. The server has to know the total number of items, not just the fact there are more items than requested.
b. The server has to calculate the correct offsets based on the page size to return to the client
c. The client has to parse the header into various numbers.
d. The client must interperate the start and end and calculate a new start and end based on page size.
e. When the server receives a Range request, it has to parse the Range header and verify that the client is requesting a valid range (i.e the client isn't asking for a 3 million item page). If the request is invalid, then the server has to return the appropriate error response.

In short, there are a lot of moving peices that have to be considered. Can we simplify our solution further if we assume that the client will almost always want to simply scroll through the pages from first to last (or maybe back up). If you think about it, jumping directly to a middle page doesn’t make a lot of sense in most cases. If we are willing to limit pagination to squentional forward/backward, then we can leverage a different standard: Web Linking.

The Web Linking spec also uses HTTP headers, like the Range specification, but does so in a more opaque way. The server sends back a single “Link” header that has a ref encoded into it with a URL that can be used to request the “next” page relative to the current URL. For example:

Link: </products?page=4>; rel="next"

The client then looks at this header, sees there is a “next” link and can follow it to get the next page. There is a key important point to this setup: The client has no knowledge of what the “page” parameter in the URL means. It doesn’t ever parse the URL, but instead can resolve it to request the next page. Because of this, the server can encode any information it needs to fetch the next page and only it will ever need to make sense of that data. This greatly simplifies the client logic.

DynamoDB will indicate in its response that there are more items to fetch and how to continue scanning from that point via a LastEvaluatedKey property. This feature makes it easy for us to implement Web Links. To start, let’s add a new section of test cases to products/listProducts.spec.js _inside_ of your existing listProducts describe block:

describe('pagination', function() {
    it('should not return a Link header if the returned LastEvaluatedKey is undefined', async function() {
        delete this.response.LastEvaluatedKey;
        await this.listProducts(this.context);
        expect(this.context.response.headers.Link).toBeUndefined();
    });

    it('should return a properly formatted link header when LastEvaluatedKey is returned', async function() {
        this.response.LastEvaluatedKey = {
            id: 'id123'
        };
        await this.listProducts(this.context);
        expect(this.context.response.headers.link)
            .toEqual('</products?_lek=id123>; rel="next"');
    });

    it('should pass the _lek param to Dyanamo if this is a pagination request', async function() {
        this.context.query = {
            _lek: 'key123'
        };
        await this.listProducts(this.context);
        expect(this.documentClient.scan.calls.argsFor(0)[0].ExclusiveStartKey)
            .toEqual({id: 'key123'});
    });
});

To make our implementation even easier, we can leverage the format-link-header library to build our header. (There is even a companion library for Javascript clients to parse the Link header). Install format-link-header and modify products/listProducts.js to match something similar to this:

npm install format-link-header --save

const AWS = require('aws-sdk');
const documentClient = new AWS.DynamoDB.DocumentClient();
const formatLinkHeader = require('format-link-header');
const productsTableName = process.env.PRODUCTS_TABLE_NAME || 'Products';

function getExclusiveStartKey(ctx) {
    if (ctx.query && ctx.query._lek) {
        return {
            id: ctx.query._lek
        };
    }
}

function addLinkHeaderIfNeeded(ctx, lastEvaluatedKey) {
    if (lastEvaluatedKey) {
        const link = {
            next: {
                rel: 'next',
                url: `/products?_lek=${lastEvaluatedKey.id}`
            }
        };
        ctx.response.set('link', formatLinkHeader(link));
    }
}

module.exports = async function getProductList(ctx) {
    const scanOutput = await documentClient.scan({
        Segment: ctx.segment,
        TableName: productsTableName,
        Limit: 25,
        ExclusiveStartKey: getExclusiveStartKey(ctx)
    }).promise();

    addLinkHeaderIfNeeded(ctx, scanOutput.LastEvaluatedKey);
    ctx.body = scanOutput.Items;
};

Now clients can page through all products simply. You can see our template changes here.

If you have questions or feedback on this series, contact the authors at nodereference@sourceallies.com.