Skip to content

Replace extending of Formatter with DialectOptions object #493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions docs/language.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,35 @@ Specifies the SQL dialect to use.
- `"sqlite"` - [SQLite][sqlite]
- `"transactsql"` or `"tsql"` - [SQL Server Transact-SQL][tsql]
- `"trino"` - [Trino][] / [Presto][]
- custom formatter class (see below)
- custom SQL dialect configuration object (see below)

The default `"sql"` dialect is meant for cases where you don't know which dialect of SQL you're about to format.
It's not an auto-detection, it just supports a subset of features common enough in many SQL implementations.
This might or might not work for your specific dialect.
Better to always pick something more specific if possible.

### Custom formatter class (experimental)
### Custom dialect configuration (experimental)

The language parameter can also be used to specify a custom formatter implementation:
The language parameter can also be used to specify a custom SQL dialect configuration:

```ts
import { format, Formatter, Tokenizer } from 'sql-formatter';
import { format, DialectOptions } from 'sql-formatter';

class MyFormatter extends Formatter {
tokenizer() {
return new Tokenizer({
// See source code for examples of tokenizer config options
// For example: src/languages/sqlite/sqlite.formatter.ts
});
}
}
const myDialect: DialectOptions {
tokenizerOptions: {
// See source code for examples of tokenizer config options
// For example: src/languages/sqlite/sqlite.formatter.ts
},
formatOptions: {
// ...
},
};

const result = format('SELECT * FROM tbl', { language: MyFormatter });
const result = format('SELECT * FROM tbl', { language: myDialect });
```

**NB!** This functionality is experimental and there are no stability guarantees for this API.
The API of Formatter and Tokenizer classes can (and likely will) change in non-major releases.
The DialectOptions interface can (and likely will) change in non-major releases.
You likely only want to use this if your other alternative is to fork SQL Formatter.

[standard sql]: https://en.wikipedia.org/wiki/SQL:2011
Expand Down
3 changes: 0 additions & 3 deletions src/FormatOptions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
// import only type to avoid ESLint no-cycle rule producing an error
import type { SqlLanguage } from './sqlFormatter.js';
import { ParamItems } from './formatter/Params.js';
import Formatter from './formatter/Formatter.js';
import { ParamTypes } from './lexer/TokenizerOptions.js';

export type IndentStyle = 'standard' | 'tabularLeft' | 'tabularRight';
Expand All @@ -13,7 +11,6 @@ export type CommaPosition = 'before' | 'after' | 'tabular';
export type LogicalOperatorNewline = 'before' | 'after';

export interface FormatOptions {
language: SqlLanguage | typeof Formatter;
tabWidth: number;
useTabs: boolean;
keywordCase: KeywordCase;
Expand Down
44 changes: 44 additions & 0 deletions src/dialect.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import {
DialectFormatOptions,
ProcessedDialectFormatOptions,
} from './formatter/ExpressionFormatter.js';
import Tokenizer from './lexer/Tokenizer.js';
import { TokenizerOptions } from './lexer/TokenizerOptions.js';

export interface DialectOptions {
tokenizerOptions: TokenizerOptions;
formatOptions: DialectFormatOptions;
}

export interface Dialect {
tokenizer: Tokenizer;
formatOptions: ProcessedDialectFormatOptions;
}

const cache = new Map<DialectOptions, Dialect>();

/**
* Factory function for building Dialect objects.
* When called repeatedly with same options object returns the cached Dialect,
* to avoid the cost of creating it again.
*/
export const createDialect = (options: DialectOptions): Dialect => {
let dialect = cache.get(options);
if (!dialect) {
dialect = dialectFromOptions(options);
cache.set(options, dialect);
}
return dialect;
};

const dialectFromOptions = (dialectOptions: DialectOptions): Dialect => ({
tokenizer: new Tokenizer(dialectOptions.tokenizerOptions),
formatOptions: processDialectFormatOptions(dialectOptions.formatOptions),
});

const processDialectFormatOptions = (
options: DialectFormatOptions
): ProcessedDialectFormatOptions => ({
alwaysDenseOperators: options.alwaysDenseOperators || [],
onelineClauses: Object.fromEntries(options.onelineClauses.map(name => [name, true])),
});
47 changes: 7 additions & 40 deletions src/formatter/Formatter.ts
Original file line number Diff line number Diff line change
@@ -1,62 +1,29 @@
import { FormatOptions } from '../FormatOptions.js';
import { indentString } from './config.js';
import Params from './Params.js';
import Tokenizer from '../lexer/Tokenizer.js';

import { createParser } from '../parser/createParser.js';
import { StatementNode } from '../parser/ast.js';
import { cacheInClassField } from '../utils.js';
import { Dialect } from '../dialect.js';

import formatCommaPositions from './formatCommaPositions.js';
import formatAliasPositions from './formatAliasPositions.js';
import ExpressionFormatter, {
DialectFormatOptions,
ProcessedDialectFormatOptions,
} from './ExpressionFormatter.js';
import ExpressionFormatter from './ExpressionFormatter.js';
import Layout, { WS } from './Layout.js';
import Indentation from './Indentation.js';

/** Main formatter class that produces a final output string from list of tokens */
export default class Formatter {
private dialect: Dialect;
private cfg: FormatOptions;
private params: Params;

constructor(cfg: FormatOptions) {
constructor(dialect: Dialect, cfg: FormatOptions) {
this.dialect = dialect;
this.cfg = cfg;
this.params = new Params(this.cfg.params);
}

/**
* SQL Tokenizer for this formatter, provided by subclasses.
*/
protected tokenizer(): Tokenizer {
throw new Error('tokenizer() not implemented by subclass');
}

// Cache the tokenizer for each class (each SQL dialect)
// So we wouldn't need to recreate the tokenizer, which is kinda expensive,
// for each call to format() function.
private cachedTokenizer(): Tokenizer {
return cacheInClassField(this.constructor, 'cachedTokenizer', () => this.tokenizer());
}

/**
* Dialect-specific formatting configuration, provided by subclass.
*/
protected formatOptions(): DialectFormatOptions {
throw new Error('formatOptions() not implemented by sybclass');
}

private cachedFormatOptions(): ProcessedDialectFormatOptions {
return cacheInClassField(this.constructor, 'cachedFormatOptions', () => {
const opts = this.formatOptions();
return {
alwaysDenseOperators: opts.alwaysDenseOperators || [],
onelineClauses: Object.fromEntries(opts.onelineClauses.map(name => [name, true])),
};
});
}

/**
* Formats an SQL query.
* @param {string} query - The SQL query string to be formatted
Expand All @@ -71,7 +38,7 @@ export default class Formatter {
}

private parse(query: string): StatementNode[] {
return createParser(this.cachedTokenizer()).parse(query, this.cfg.paramTypes || {});
return createParser(this.dialect.tokenizer).parse(query, this.cfg.paramTypes || {});
}

private formatAst(statements: StatementNode[]): string {
Expand All @@ -83,7 +50,7 @@ export default class Formatter {
private formatStatement(statement: StatementNode): string {
const layout = new ExpressionFormatter({
cfg: this.cfg,
dialectCfg: this.cachedFormatOptions(),
dialectCfg: this.dialect.formatOptions,
params: this.params,
layout: new Layout(new Indentation(indentString(this.cfg))),
}).format(statement.children);
Expand Down
3 changes: 1 addition & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ export type {
LogicalOperatorNewline,
FormatOptions,
} from './FormatOptions.js';
export { default as Formatter } from './formatter/Formatter.js';
export { default as Tokenizer } from './lexer/Tokenizer.js';
export type { DialectOptions } from './dialect.js';
export { expandPhrases } from './expandPhrases.js';
71 changes: 32 additions & 39 deletions src/languages/bigquery/bigquery.formatter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import Formatter from '../../formatter/Formatter.js';
import { DialectFormatOptions } from '../../formatter/ExpressionFormatter.js';
import Tokenizer from '../../lexer/Tokenizer.js';
import { DialectOptions } from '../../dialect.js';
import { EOF_TOKEN, isToken, TokenType, Token } from '../../lexer/token.js';
import { expandPhrases } from '../../expandPhrases.js';
import { keywords } from './bigquery.keywords.js';
Expand Down Expand Up @@ -155,42 +153,37 @@ const reservedPhrases = expandPhrases([
]);

// https://cloud.google.com/bigquery/docs/reference/#standard-sql-reference
export default class BigQueryFormatter extends Formatter {
tokenizer() {
return new Tokenizer({
reservedSelect,
reservedClauses: [...reservedClauses, ...onelineClauses],
reservedSetOperations,
reservedJoins,
reservedPhrases,
reservedKeywords: keywords,
reservedFunctionNames: functions,
extraParens: ['[]'],
stringTypes: [
// The triple-quoted strings are listed first, so they get matched first.
// Otherwise the first two quotes of """ will get matched as an empty "" string.
{ quote: '""".."""', prefixes: ['R', 'B', 'RB', 'BR'] },
{ quote: "'''..'''", prefixes: ['R', 'B', 'RB', 'BR'] },
'""-bs',
"''-bs",
{ quote: '""-raw', prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
{ quote: "''-raw", prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
],
identTypes: ['``'],
identChars: { dashes: true },
paramTypes: { positional: true, named: ['@'], quoted: ['@'] },
lineCommentTypes: ['--', '#'],
operators: ['&', '|', '^', '~', '>>', '<<', '||'],
postProcess,
});
}

formatOptions(): DialectFormatOptions {
return {
onelineClauses,
};
}
}
export const bigquery: DialectOptions = {
tokenizerOptions: {
reservedSelect,
reservedClauses: [...reservedClauses, ...onelineClauses],
reservedSetOperations,
reservedJoins,
reservedPhrases,
reservedKeywords: keywords,
reservedFunctionNames: functions,
extraParens: ['[]'],
stringTypes: [
// The triple-quoted strings are listed first, so they get matched first.
// Otherwise the first two quotes of """ will get matched as an empty "" string.
{ quote: '""".."""', prefixes: ['R', 'B', 'RB', 'BR'] },
{ quote: "'''..'''", prefixes: ['R', 'B', 'RB', 'BR'] },
'""-bs',
"''-bs",
{ quote: '""-raw', prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
{ quote: "''-raw", prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
],
identTypes: ['``'],
identChars: { dashes: true },
paramTypes: { positional: true, named: ['@'], quoted: ['@'] },
lineCommentTypes: ['--', '#'],
operators: ['&', '|', '^', '~', '>>', '<<', '||'],
postProcess,
},
formatOptions: {
onelineClauses,
},
};

function postProcess(tokens: Token[]): Token[] {
return detectArraySubscripts(combineParameterizedTypes(tokens));
Expand Down
53 changes: 23 additions & 30 deletions src/languages/db2/db2.formatter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { DialectOptions } from '../../dialect.js';
import { expandPhrases } from '../../expandPhrases.js';
import Formatter from '../../formatter/Formatter.js';
import { DialectFormatOptions } from '../../formatter/ExpressionFormatter.js';
import Tokenizer from '../../lexer/Tokenizer.js';
import { functions } from './db2.functions.js';
import { keywords } from './db2.keywords.js';

Expand Down Expand Up @@ -178,30 +176,25 @@ const reservedPhrases = expandPhrases([
]);

// https://www.ibm.com/support/knowledgecenter/en/ssw_ibm_i_72/db2/rbafzintro.htm
export default class Db2Formatter extends Formatter {
tokenizer() {
return new Tokenizer({
reservedSelect,
reservedClauses: [...reservedClauses, ...onelineClauses],
reservedSetOperations,
reservedJoins,
reservedPhrases,
reservedKeywords: keywords,
reservedFunctionNames: functions,
stringTypes: [
{ quote: "''-qq", prefixes: ['G', 'N', 'U&'] },
{ quote: "''-raw", prefixes: ['X', 'BX', 'GX', 'UX'], requirePrefix: true },
],
identTypes: [`""-qq`],
paramTypes: { positional: true, named: [':'] },
paramChars: { first: '@#$', rest: '@#$' },
operators: ['**', '¬=', '¬>', '¬<', '!>', '!<', '||'],
});
}

formatOptions(): DialectFormatOptions {
return {
onelineClauses,
};
}
}
export const db2: DialectOptions = {
tokenizerOptions: {
reservedSelect,
reservedClauses: [...reservedClauses, ...onelineClauses],
reservedSetOperations,
reservedJoins,
reservedPhrases,
reservedKeywords: keywords,
reservedFunctionNames: functions,
stringTypes: [
{ quote: "''-qq", prefixes: ['G', 'N', 'U&'] },
{ quote: "''-raw", prefixes: ['X', 'BX', 'GX', 'UX'], requirePrefix: true },
],
identTypes: [`""-qq`],
paramTypes: { positional: true, named: [':'] },
paramChars: { first: '@#$', rest: '@#$' },
operators: ['**', '¬=', '¬>', '¬<', '!>', '!<', '||'],
},
formatOptions: {
onelineClauses,
},
};
Loading