Skip to content

feat: context cache #70

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ jobs:
matrix:
os: [ubuntu-latest]
node-version:
- 12.x
- 14.x
- 16.x
- 18.x
- 20.x
steps:
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ yarn-error.log
**/index.d.ts
coverage
documentation
perf/output.txt
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,18 @@ This library exposes many operations that are useful to parse and handle a JSON-
For this, the static functions on [`Util`](https://github.com/rubensworks/jsonld-context-parser.js/blob/master/lib/Util.ts)
and [`ContextParser`](https://github.com/rubensworks/jsonld-context-parser.js/blob/master/lib/ContextParser.ts) can be used.

##### Context Caching

This library supports the ability to cache context entry calculations and share them between multiple context parsers. This can be done as follows:

```ts
import { ContextCache, ContextParser } from 'jsonld-context-parser';

const contextCache = new ContextCache();
const contextParser1 = new ContextParser({ contextCache });
const contextParser2 = new ContextParser({ contextCache });
```

## Command-line

A command-line tool is provided to quickly normalize any context by URL, file or string.
Expand Down
2 changes: 2 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ export * from './lib/IDocumentLoader';
export * from './lib/JsonLdContext';
export * from './lib/JsonLdContextNormalized';
export * from './lib/Util';
export * from './lib/ContextCache';
export * from './lib/IContextCache';
46 changes: 46 additions & 0 deletions lib/ContextCache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { JsonLdContextNormalized } from "./JsonLdContextNormalized";
import { JsonLdContext } from "./JsonLdContext";
import md5 = require("md5");
import { IParseOptions } from "./ContextParser";
import { IContextCache } from "./IContextCache";
import { LRUCache } from "lru-cache";

function hashOptions(options: IParseOptions | undefined) {
const opts = { ...options, parentContext: undefined };
for (const key of Object.keys(opts)) {
if (typeof opts[key as keyof typeof opts] === "undefined") {
delete opts[key as keyof typeof opts];
}
}

return md5(JSON.stringify(opts, Object.keys(opts).sort()));
}

export class ContextCache implements IContextCache {
private cachedParsing: LRUCache<string, Promise<JsonLdContextNormalized>>;

constructor(options?: LRUCache.Options<string, Promise<JsonLdContextNormalized>, unknown>) {
this.cachedParsing = new LRUCache(options ?? { max: 512 })
}

public hash(
context: JsonLdContext,
options?: IParseOptions
): string {
let hash = hashOptions(options);

if (options?.parentContext && Object.keys(options.parentContext).length !== 0) {
hash = md5(hash + md5(JSON.stringify(context)));
}

return md5(hash + md5(JSON.stringify(context)));
}

get(context: string): Promise<JsonLdContextNormalized> | undefined {
return this.cachedParsing.get(context);
}

set(context: string, normalized: Promise<JsonLdContextNormalized>): void {
this.cachedParsing.set(context, normalized);
}
}
195 changes: 112 additions & 83 deletions lib/ContextParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import {IDocumentLoader} from "./IDocumentLoader";
import {IJsonLdContext, IJsonLdContextNormalizedRaw, IPrefixValue, JsonLdContext} from "./JsonLdContext";
import {JsonLdContextNormalized, defaultExpandOptions, IExpandOptions} from "./JsonLdContextNormalized";
import {Util} from "./Util";
import { IContextCache } from './IContextCache';
import { ContextCache } from './ContextCache';

// tslint:disable-next-line:no-var-requires
const canonicalizeJson = require('canonicalize');
Expand All @@ -23,10 +25,12 @@ export class ContextParser {
private readonly expandContentTypeToBase: boolean;
private readonly remoteContextsDepthLimit: number;
private readonly redirectSchemaOrgHttps: boolean;
private readonly contextCache?: IContextCache;

constructor(options?: IContextParserOptions) {
options = options || {};
this.documentLoader = options.documentLoader || new FetchDocumentLoader();
this.contextCache = options.contextCache;
this.documentCache = {};
this.validateContext = !options.skipValidation;
this.expandContentTypeToBase = !!options.expandContentTypeToBase;
Expand Down Expand Up @@ -610,14 +614,14 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
const parentContext = {...context};
parentContext[key] = {...parentContext[key]};
delete parentContext[key]['@context'];
await this.parse(value['@context'],
await this._parse(value['@context'],
{ ...options, external: false, parentContext, ignoreProtection: true, ignoreRemoteScopedContexts: true, ignoreScopedContexts: true });
} catch (e) {
throw new ErrorCoded(e.message, ERROR_CODES.INVALID_SCOPED_CONTEXT);
}
}

value['@context'] = (await this.parse(value['@context'],
value['@context'] = (await this._parse(value['@context'],
{ ...options, external: false, minimalProcessing: true, ignoreRemoteScopedContexts: true, parentContext: context }))
.getContextRaw();
}
Expand All @@ -633,7 +637,105 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
* @return {Promise<JsonLdContextNormalized>} A promise resolving to the context.
*/
public async parse(context: JsonLdContext,
options: IParseOptions = {}): Promise<JsonLdContextNormalized> {
options: IParseOptions = {}): Promise<JsonLdContextNormalized> {
if (!this.contextCache)
return this._parse(context, options);

const hash = this.contextCache.hash(context, options);
const cached = this.contextCache.get(hash);
if (cached)
return cached;

const parsed = this._parse(context, options);
this.contextCache.set(hash, parsed);
return parsed;
}

/**
* Fetch the given URL as a raw JSON-LD context.
* @param url An URL.
* @return A promise resolving to a raw JSON-LD context.
*/
public async load(url: string): Promise<JsonLdContext> {
// First try to retrieve the context from cache
const cached = this.documentCache[url];
if (cached) {
return typeof cached === 'string' ? cached : Array.isArray(cached) ? cached.slice() : {... cached};
}

// If not in cache, load it
let document: IJsonLdContext;
try {
document = await this.documentLoader.load(url);
} catch (e) {
throw new ErrorCoded(`Failed to load remote context ${url}: ${e.message}`,
ERROR_CODES.LOADING_REMOTE_CONTEXT_FAILED);
}

// Validate the context
if (!('@context' in document)) {
throw new ErrorCoded(`Missing @context in remote context at ${url}`,
ERROR_CODES.INVALID_REMOTE_CONTEXT);
}

return this.documentCache[url] = document['@context'];
}

/**
* Override the given context that may be loaded.
*
* This will check whether or not the url is recursively being loaded.
* @param url An URL.
* @param options Parsing options.
* @return An overridden context, or null.
* Optionally an error can be thrown if a cyclic context is detected.
*/
public getOverriddenLoad(url: string, options: IParseOptions): IJsonLdContextNormalizedRaw | null {
if (url in (options.remoteContexts || {})) {
if (options.ignoreRemoteScopedContexts) {
return <IJsonLdContextNormalizedRaw> <any> url;
} else {
throw new ErrorCoded('Detected a cyclic context inclusion of ' + url,
ERROR_CODES.RECURSIVE_CONTEXT_INCLUSION);
}
}
return null;
}

/**
* Load an @import'ed context.
* @param importContextIri The full URI of an @import value.
*/
public async loadImportContext(importContextIri: string): Promise<IJsonLdContextNormalizedRaw> {
// Load the context
const importContext = await this.load(importContextIri);

// Require the context to be a non-array object
if (typeof importContext !== 'object' || Array.isArray(importContext)) {
throw new ErrorCoded('An imported context must be a single object: ' + importContextIri,
ERROR_CODES.INVALID_REMOTE_CONTEXT);
}

// Error if the context contains another @import
if ('@import' in importContext) {
throw new ErrorCoded('An imported context can not import another context: ' + importContextIri,
ERROR_CODES.INVALID_CONTEXT_ENTRY);
}

// Containers have to be converted into hash values the same way as for the importing context
// Otherwise context validation will fail for container values
this.containersToHash(importContext);
return importContext;
}

/**
* Parse a JSON-LD context in any form.
* @param {JsonLdContext} context A context, URL to a context, or an array of contexts/URLs.
* @param {IParseOptions} options Optional parsing options.
* @return {Promise<JsonLdContextNormalized>} A promise resolving to the context.
*/
private async _parse(context: JsonLdContext,
options: IParseOptions): Promise<JsonLdContextNormalized> {
const {
baseIRI,
parentContext: parentContextInitial,
Expand Down Expand Up @@ -667,7 +769,7 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
if (overriddenLoad) {
return new JsonLdContextNormalized(overriddenLoad);
}
const parsedStringContext = await this.parse(await this.load(contextIri),
const parsedStringContext = await this._parse(await this.load(contextIri),
{
...options,
baseIRI: contextIri,
Expand Down Expand Up @@ -699,7 +801,7 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
}

const reducedContexts = await contexts.reduce((accContextPromise, contextEntry, i) => accContextPromise
.then((accContext) => this.parse(contextEntry, {
.then((accContext) => this._parse(contextEntry, {
...options,
baseIRI: contextIris[i] || options.baseIRI,
external: !!contextIris[i] || options.external,
Expand All @@ -714,7 +816,7 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
return reducedContexts;
} else if (typeof context === 'object') {
if ('@context' in context) {
return await this.parse(context['@context'], options);
return await this._parse(context['@context'], options);
}

// Make a deep clone of the given context, to avoid modifying it.
Expand Down Expand Up @@ -807,90 +909,17 @@ must be one of ${Util.CONTAINERS.join(', ')}`, ERROR_CODES.INVALID_CONTAINER_MAP
}
}

/**
* Fetch the given URL as a raw JSON-LD context.
* @param url An URL.
* @return A promise resolving to a raw JSON-LD context.
*/
public async load(url: string): Promise<JsonLdContext> {
// First try to retrieve the context from cache
const cached = this.documentCache[url];
if (cached) {
return typeof cached === 'string' ? cached : Array.isArray(cached) ? cached.slice() : {... cached};
}

// If not in cache, load it
let document: IJsonLdContext;
try {
document = await this.documentLoader.load(url);
} catch (e) {
throw new ErrorCoded(`Failed to load remote context ${url}: ${e.message}`,
ERROR_CODES.LOADING_REMOTE_CONTEXT_FAILED);
}

// Validate the context
if (!('@context' in document)) {
throw new ErrorCoded(`Missing @context in remote context at ${url}`,
ERROR_CODES.INVALID_REMOTE_CONTEXT);
}

return this.documentCache[url] = document['@context'];
}

/**
* Override the given context that may be loaded.
*
* This will check whether or not the url is recursively being loaded.
* @param url An URL.
* @param options Parsing options.
* @return An overridden context, or null.
* Optionally an error can be thrown if a cyclic context is detected.
*/
public getOverriddenLoad(url: string, options: IParseOptions): IJsonLdContextNormalizedRaw | null {
if (url in (options.remoteContexts || {})) {
if (options.ignoreRemoteScopedContexts) {
return <IJsonLdContextNormalizedRaw> <any> url;
} else {
throw new ErrorCoded('Detected a cyclic context inclusion of ' + url,
ERROR_CODES.RECURSIVE_CONTEXT_INCLUSION);
}
}
return null;
}

/**
* Load an @import'ed context.
* @param importContextIri The full URI of an @import value.
*/
public async loadImportContext(importContextIri: string): Promise<IJsonLdContextNormalizedRaw> {
// Load the context
const importContext = await this.load(importContextIri);

// Require the context to be a non-array object
if (typeof importContext !== 'object' || Array.isArray(importContext)) {
throw new ErrorCoded('An imported context must be a single object: ' + importContextIri,
ERROR_CODES.INVALID_REMOTE_CONTEXT);
}

// Error if the context contains another @import
if ('@import' in importContext) {
throw new ErrorCoded('An imported context can not import another context: ' + importContextIri,
ERROR_CODES.INVALID_CONTEXT_ENTRY);
}

// Containers have to be converted into hash values the same way as for the importing context
// Otherwise context validation will fail for container values
this.containersToHash(importContext);
return importContext;
}

}

export interface IContextParserOptions {
/**
* An optional loader that should be used for fetching external JSON-LD contexts.
*/
documentLoader?: IDocumentLoader;
/**
* An optional cache for parsed contexts.
*/
contextCache?: IContextCache;
/**
* By default, JSON-LD contexts will be validated.
* This can be disabled by setting this option to true.
Expand Down
25 changes: 25 additions & 0 deletions lib/IContextCache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { JsonLdContextNormalized } from "./JsonLdContextNormalized";
import { JsonLdContext } from "./JsonLdContext";
import { IParseOptions } from "./ContextParser";

/**
* Caches the normalized version of a JSON-LD context.
*/
export interface IContextCache {
/**
* Returns a cached version of the normalized version of a JSON-LD context.
* @param {string} context A hashed JSON-LD Context.
* @return {Promise<JsonLdContextNormalized> | undefined} A promise resolving to a normalized JSON-LD context.
*/
get(context: string): Promise<JsonLdContextNormalized> | undefined;
/**
* Stores a cached version of the normalized version of a JSON-LD context.
* @param {string} context A hashed JSON-LD Context.
* @return {Promise<JsonLdContextNormalized>} A promise resolving to a normalized JSON-LD context.
*/
set(context: string, normalized: Promise<JsonLdContextNormalized>): void;
/**
* Hash a context/options pair into the key to be used for caching the context.
*/
hash(context: JsonLdContext, options: IParseOptions | undefined): string;
}
Loading