sql-formatter-org · nene · Oct 11, 2022 · Oct 5, 2022 · Oct 5, 2022 · Oct 5, 2022
diff --git a/docs/language.md b/docs/language.md
@@ -20,34 +20,35 @@ Specifies the SQL dialect to use.
 - `"sqlite"` - [SQLite][sqlite]
 - `"transactsql"` or `"tsql"` - [SQL Server Transact-SQL][tsql]
 - `"trino"` - [Trino][] / [Presto][]
-- custom formatter class (see below)
+- custom SQL dialect configuration object (see below)
 
 The default `"sql"` dialect is meant for cases where you don't know which dialect of SQL you're about to format.
 It's not an auto-detection, it just supports a subset of features common enough in many SQL implementations.
 This might or might not work for your specific dialect.
 Better to always pick something more specific if possible.
 
-### Custom formatter class (experimental)
+### Custom dialect configuration (experimental)
 
-The language parameter can also be used to specify a custom formatter implementation:
+The language parameter can also be used to specify a custom SQL dialect configuration:
 
 ```ts
-import { format, Formatter, Tokenizer } from 'sql-formatter';
+import { format, DialectOptions } from 'sql-formatter';
 
-class MyFormatter extends Formatter {
-  tokenizer() {
-    return new Tokenizer({
-      // See source code for examples of tokenizer config options
-      // For example: src/languages/sqlite/sqlite.formatter.ts
-    });
-  }
-}
+const myDialect: DialectOptions {
+  tokenizerOptions: {
+    // See source code for examples of tokenizer config options
+    // For example: src/languages/sqlite/sqlite.formatter.ts
+  },
+  formatOptions: {
+    // ...
+  },
+};
 
-const result = format('SELECT * FROM tbl', { language: MyFormatter });
+const result = format('SELECT * FROM tbl', { language: myDialect });
 ```
 
 **NB!** This functionality is experimental and there are no stability guarantees for this API.
-The API of Formatter and Tokenizer classes can (and likely will) change in non-major releases.
+The DialectOptions interface can (and likely will) change in non-major releases.
 You likely only want to use this if your other alternative is to fork SQL Formatter.
 
 [standard sql]: https://en.wikipedia.org/wiki/SQL:2011

diff --git a/src/FormatOptions.ts b/src/FormatOptions.ts
@@ -1,7 +1,5 @@
 // import only type to avoid ESLint no-cycle rule producing an error
-import type { SqlLanguage } from './sqlFormatter.js';
 import { ParamItems } from './formatter/Params.js';
-import Formatter from './formatter/Formatter.js';
 import { ParamTypes } from './lexer/TokenizerOptions.js';
 
 export type IndentStyle = 'standard' | 'tabularLeft' | 'tabularRight';
@@ -13,7 +11,6 @@ export type CommaPosition = 'before' | 'after' | 'tabular';
 export type LogicalOperatorNewline = 'before' | 'after';
 
 export interface FormatOptions {
-  language: SqlLanguage | typeof Formatter;
   tabWidth: number;
   useTabs: boolean;
   keywordCase: KeywordCase;

diff --git a/src/dialect.ts b/src/dialect.ts
@@ -0,0 +1,44 @@
+import {
+  DialectFormatOptions,
+  ProcessedDialectFormatOptions,
+} from './formatter/ExpressionFormatter.js';
+import Tokenizer from './lexer/Tokenizer.js';
+import { TokenizerOptions } from './lexer/TokenizerOptions.js';
+
+export interface DialectOptions {
+  tokenizerOptions: TokenizerOptions;
+  formatOptions: DialectFormatOptions;
+}
+
+export interface Dialect {
+  tokenizer: Tokenizer;
+  formatOptions: ProcessedDialectFormatOptions;
+}
+
+const cache = new Map<DialectOptions, Dialect>();
+
+/**
+ * Factory function for building Dialect objects.
+ * When called repeatedly with same options object returns the cached Dialect,
+ * to avoid the cost of creating it again.
+ */
+export const createDialect = (options: DialectOptions): Dialect => {
+  let dialect = cache.get(options);
+  if (!dialect) {
+    dialect = dialectFromOptions(options);
+    cache.set(options, dialect);
+  }
+  return dialect;
+};
+
+const dialectFromOptions = (dialectOptions: DialectOptions): Dialect => ({
+  tokenizer: new Tokenizer(dialectOptions.tokenizerOptions),
+  formatOptions: processDialectFormatOptions(dialectOptions.formatOptions),
+});
+
+const processDialectFormatOptions = (
+  options: DialectFormatOptions
+): ProcessedDialectFormatOptions => ({
+  alwaysDenseOperators: options.alwaysDenseOperators || [],
+  onelineClauses: Object.fromEntries(options.onelineClauses.map(name => [name, true])),
+});
diff --git a/src/formatter/Formatter.ts b/src/formatter/Formatter.ts
@@ -1,62 +1,29 @@
 import { FormatOptions } from '../FormatOptions.js';
 import { indentString } from './config.js';
 import Params from './Params.js';
-import Tokenizer from '../lexer/Tokenizer.js';
 
 import { createParser } from '../parser/createParser.js';
 import { StatementNode } from '../parser/ast.js';
-import { cacheInClassField } from '../utils.js';
+import { Dialect } from '../dialect.js';
 
 import formatCommaPositions from './formatCommaPositions.js';
 import formatAliasPositions from './formatAliasPositions.js';
-import ExpressionFormatter, {
-  DialectFormatOptions,
-  ProcessedDialectFormatOptions,
-} from './ExpressionFormatter.js';
+import ExpressionFormatter from './ExpressionFormatter.js';
 import Layout, { WS } from './Layout.js';
 import Indentation from './Indentation.js';
 
 /** Main formatter class that produces a final output string from list of tokens */
 export default class Formatter {
+  private dialect: Dialect;
   private cfg: FormatOptions;
   private params: Params;
 
-  constructor(cfg: FormatOptions) {
+  constructor(dialect: Dialect, cfg: FormatOptions) {
+    this.dialect = dialect;
     this.cfg = cfg;
     this.params = new Params(this.cfg.params);
   }
 
-  /**
-   * SQL Tokenizer for this formatter, provided by subclasses.
-   */
-  protected tokenizer(): Tokenizer {
-    throw new Error('tokenizer() not implemented by subclass');
-  }
-
-  // Cache the tokenizer for each class (each SQL dialect)
-  // So we wouldn't need to recreate the tokenizer, which is kinda expensive,
-  // for each call to format() function.
-  private cachedTokenizer(): Tokenizer {
-    return cacheInClassField(this.constructor, 'cachedTokenizer', () => this.tokenizer());
-  }
-
-  /**
-   * Dialect-specific formatting configuration, provided by subclass.
-   */
-  protected formatOptions(): DialectFormatOptions {
-    throw new Error('formatOptions() not implemented by sybclass');
-  }
-
-  private cachedFormatOptions(): ProcessedDialectFormatOptions {
-    return cacheInClassField(this.constructor, 'cachedFormatOptions', () => {
-      const opts = this.formatOptions();
-      return {
-        alwaysDenseOperators: opts.alwaysDenseOperators || [],
-        onelineClauses: Object.fromEntries(opts.onelineClauses.map(name => [name, true])),
-      };
-    });
-  }
-
   /**
    * Formats an SQL query.
    * @param {string} query - The SQL query string to be formatted
@@ -71,7 +38,7 @@ export default class Formatter {
   }
 
   private parse(query: string): StatementNode[] {
-    return createParser(this.cachedTokenizer()).parse(query, this.cfg.paramTypes || {});
+    return createParser(this.dialect.tokenizer).parse(query, this.cfg.paramTypes || {});
   }
 
   private formatAst(statements: StatementNode[]): string {
@@ -83,7 +50,7 @@ export default class Formatter {
   private formatStatement(statement: StatementNode): string {
     const layout = new ExpressionFormatter({
       cfg: this.cfg,
-      dialectCfg: this.cachedFormatOptions(),
+      dialectCfg: this.dialect.formatOptions,
       params: this.params,
       layout: new Layout(new Indentation(indentString(this.cfg))),
     }).format(statement.children);

diff --git a/src/index.ts b/src/index.ts
@@ -6,6 +6,5 @@ export type {
   LogicalOperatorNewline,
   FormatOptions,
 } from './FormatOptions.js';
-export { default as Formatter } from './formatter/Formatter.js';
-export { default as Tokenizer } from './lexer/Tokenizer.js';
+export type { DialectOptions } from './dialect.js';
 export { expandPhrases } from './expandPhrases.js';
diff --git a/src/languages/bigquery/bigquery.formatter.ts b/src/languages/bigquery/bigquery.formatter.ts
@@ -1,6 +1,4 @@
-import Formatter from '../../formatter/Formatter.js';
-import { DialectFormatOptions } from '../../formatter/ExpressionFormatter.js';
-import Tokenizer from '../../lexer/Tokenizer.js';
+import { DialectOptions } from '../../dialect.js';
 import { EOF_TOKEN, isToken, TokenType, Token } from '../../lexer/token.js';
 import { expandPhrases } from '../../expandPhrases.js';
 import { keywords } from './bigquery.keywords.js';
@@ -155,42 +153,37 @@ const reservedPhrases = expandPhrases([
 ]);
 
 // https://cloud.google.com/bigquery/docs/reference/#standard-sql-reference
-export default class BigQueryFormatter extends Formatter {
-  tokenizer() {
-    return new Tokenizer({
-      reservedSelect,
-      reservedClauses: [...reservedClauses, ...onelineClauses],
-      reservedSetOperations,
-      reservedJoins,
-      reservedPhrases,
-      reservedKeywords: keywords,
-      reservedFunctionNames: functions,
-      extraParens: ['[]'],
-      stringTypes: [
-        // The triple-quoted strings are listed first, so they get matched first.
-        // Otherwise the first two quotes of """ will get matched as an empty "" string.
-        { quote: '""".."""', prefixes: ['R', 'B', 'RB', 'BR'] },
-        { quote: "'''..'''", prefixes: ['R', 'B', 'RB', 'BR'] },
-        '""-bs',
-        "''-bs",
-        { quote: '""-raw', prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
-        { quote: "''-raw", prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
-      ],
-      identTypes: ['``'],
-      identChars: { dashes: true },
-      paramTypes: { positional: true, named: ['@'], quoted: ['@'] },
-      lineCommentTypes: ['--', '#'],
-      operators: ['&', '|', '^', '~', '>>', '<<', '||'],
-      postProcess,
-    });
-  }
-
-  formatOptions(): DialectFormatOptions {
-    return {
-      onelineClauses,
-    };
-  }
-}
+export const bigquery: DialectOptions = {
+  tokenizerOptions: {
+    reservedSelect,
+    reservedClauses: [...reservedClauses, ...onelineClauses],
+    reservedSetOperations,
+    reservedJoins,
+    reservedPhrases,
+    reservedKeywords: keywords,
+    reservedFunctionNames: functions,
+    extraParens: ['[]'],
+    stringTypes: [
+      // The triple-quoted strings are listed first, so they get matched first.
+      // Otherwise the first two quotes of """ will get matched as an empty "" string.
+      { quote: '""".."""', prefixes: ['R', 'B', 'RB', 'BR'] },
+      { quote: "'''..'''", prefixes: ['R', 'B', 'RB', 'BR'] },
+      '""-bs',
+      "''-bs",
+      { quote: '""-raw', prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
+      { quote: "''-raw", prefixes: ['R', 'B', 'RB', 'BR'], requirePrefix: true },
+    ],
+    identTypes: ['``'],
+    identChars: { dashes: true },
+    paramTypes: { positional: true, named: ['@'], quoted: ['@'] },
+    lineCommentTypes: ['--', '#'],
+    operators: ['&', '|', '^', '~', '>>', '<<', '||'],
+    postProcess,
+  },
+  formatOptions: {
+    onelineClauses,
+  },
+};
 
 function postProcess(tokens: Token[]): Token[] {
   return detectArraySubscripts(combineParameterizedTypes(tokens));

diff --git a/src/languages/db2/db2.formatter.ts b/src/languages/db2/db2.formatter.ts
@@ -1,7 +1,5 @@
+import { DialectOptions } from '../../dialect.js';
 import { expandPhrases } from '../../expandPhrases.js';
-import Formatter from '../../formatter/Formatter.js';
-import { DialectFormatOptions } from '../../formatter/ExpressionFormatter.js';
-import Tokenizer from '../../lexer/Tokenizer.js';
 import { functions } from './db2.functions.js';
 import { keywords } from './db2.keywords.js';
 
@@ -178,30 +176,25 @@ const reservedPhrases = expandPhrases([
 ]);
 
 // https://www.ibm.com/support/knowledgecenter/en/ssw_ibm_i_72/db2/rbafzintro.htm
-export default class Db2Formatter extends Formatter {
-  tokenizer() {
-    return new Tokenizer({
-      reservedSelect,
-      reservedClauses: [...reservedClauses, ...onelineClauses],
-      reservedSetOperations,
-      reservedJoins,
-      reservedPhrases,
-      reservedKeywords: keywords,
-      reservedFunctionNames: functions,
-      stringTypes: [
-        { quote: "''-qq", prefixes: ['G', 'N', 'U&'] },
-        { quote: "''-raw", prefixes: ['X', 'BX', 'GX', 'UX'], requirePrefix: true },
-      ],
-      identTypes: [`""-qq`],
-      paramTypes: { positional: true, named: [':'] },
-      paramChars: { first: '@#$', rest: '@#$' },
-      operators: ['**', '¬=', '¬>', '¬<', '!>', '!<', '||'],
-    });
-  }
-
-  formatOptions(): DialectFormatOptions {
-    return {
-      onelineClauses,
-    };
-  }
-}
+export const db2: DialectOptions = {
+  tokenizerOptions: {
+    reservedSelect,
+    reservedClauses: [...reservedClauses, ...onelineClauses],
+    reservedSetOperations,
+    reservedJoins,
+    reservedPhrases,
+    reservedKeywords: keywords,
+    reservedFunctionNames: functions,
+    stringTypes: [
+      { quote: "''-qq", prefixes: ['G', 'N', 'U&'] },
+      { quote: "''-raw", prefixes: ['X', 'BX', 'GX', 'UX'], requirePrefix: true },
+    ],
+    identTypes: [`""-qq`],
+    paramTypes: { positional: true, named: [':'] },
+    paramChars: { first: '@#$', rest: '@#$' },
+    operators: ['**', '¬=', '¬>', '¬<', '!>', '!<', '||'],
+  },
+  formatOptions: {
+    onelineClauses,
+  },
+};