Skip to content

Commit a555575

Browse files
committed
Add TokenType.RESERVED_DATA_TYPE and dataTypeCase
1 parent 10b4451 commit a555575

File tree

10 files changed

+80
-14
lines changed

10 files changed

+80
-14
lines changed

src/FormatOptions.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ export type KeywordCase = 'preserve' | 'upper' | 'lower';
88

99
export type IdentifierCase = 'preserve' | 'upper' | 'lower';
1010

11+
export type DataTypeCase = 'preserve' | 'upper' | 'lower';
12+
1113
export type LogicalOperatorNewline = 'before' | 'after';
1214

1315
export interface FormatOptions {
1416
tabWidth: number;
1517
useTabs: boolean;
1618
keywordCase: KeywordCase;
1719
identifierCase: IdentifierCase;
20+
dataTypeCase: DataTypeCase;
1821
indentStyle: IndentStyle;
1922
logicalOperatorNewline: LogicalOperatorNewline;
2023
expressionWidth: number;

src/formatter/ExpressionFormatter.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import {
2828
CaseExpressionNode,
2929
CaseWhenNode,
3030
CaseElseNode,
31+
DataTypeNode,
3132
} from '../parser/ast.js';
3233

3334
import Layout, { WS } from './Layout.js';
@@ -130,6 +131,8 @@ export default class ExpressionFormatter {
130131
return this.formatLineComment(node);
131132
case NodeType.block_comment:
132133
return this.formatBlockComment(node);
134+
case NodeType.data_type:
135+
return this.formatDataType(node);
133136
case NodeType.keyword:
134137
return this.formatKeywordNode(node);
135138
}
@@ -145,7 +148,9 @@ export default class ExpressionFormatter {
145148
private formatArraySubscript(node: ArraySubscriptNode) {
146149
this.withComments(node.array, () => {
147150
this.layout.add(
148-
node.array.type === NodeType.keyword
151+
node.array.type === NodeType.data_type
152+
? this.showDataType(node.array)
153+
: node.array.type === NodeType.keyword
149154
? this.showKw(node.array)
150155
: this.showIdentifier(node.array)
151156
);
@@ -489,6 +494,10 @@ export default class ExpressionFormatter {
489494
}
490495
}
491496

497+
private formatDataType(node: DataTypeNode) {
498+
this.layout.add(this.showDataType(node), WS.SPACE);
499+
}
500+
492501
private showKw(node: KeywordNode): string {
493502
if (isTabularToken(node.tokenType)) {
494503
return toTabularFormat(this.showNonTabularKw(node), this.cfg.indentStyle);
@@ -523,4 +532,15 @@ export default class ExpressionFormatter {
523532
}
524533
}
525534
}
535+
536+
private showDataType(node: DataTypeNode): string {
537+
switch (this.cfg.dataTypeCase) {
538+
case 'preserve':
539+
return equalizeWhitespace(node.raw);
540+
case 'upper':
541+
return node.text;
542+
case 'lower':
543+
return node.text.toLowerCase();
544+
}
545+
}
526546
}

src/lexer/Tokenizer.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import { Token, TokenType } from './token.js';
2-
import * as regex from './regexFactory.js';
3-
import { ParamTypes, TokenizerOptions } from './TokenizerOptions.js';
4-
import TokenizerEngine, { TokenRule } from './TokenizerEngine.js';
5-
import { escapeRegExp, patternToRegex } from './regexUtil.js';
61
import { equalizeWhitespace, Optional } from '../utils.js';
72
import { NestedComment } from './NestedComment.js';
3+
import * as regex from './regexFactory.js';
4+
import { escapeRegExp, patternToRegex } from './regexUtil.js';
5+
import { Token, TokenType } from './token.js';
6+
import TokenizerEngine, { TokenRule } from './TokenizerEngine.js';
7+
import { ParamTypes, TokenizerOptions } from './TokenizerOptions.js';
88

99
type OptionalTokenRule = Optional<TokenRule, 'regex'>;
1010

@@ -130,6 +130,11 @@ export default class Tokenizer {
130130
regex: regex.reservedWord(cfg.reservedFunctionNames, cfg.identChars),
131131
text: toCanonical,
132132
},
133+
{
134+
type: TokenType.RESERVED_DATA_TYPE,
135+
regex: regex.reservedWord(cfg.reservedDataTypes ?? [], cfg.identChars),
136+
text: toCanonical,
137+
},
133138
{
134139
type: TokenType.RESERVED_KEYWORD,
135140
regex: regex.reservedWord(cfg.reservedKeywords, cfg.identChars),

src/lexer/TokenizerOptions.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ export interface TokenizerOptions {
6969
reservedPhrases?: string[];
7070
// built in function names
7171
reservedFunctionNames: string[];
72+
// data types
73+
reservedDataTypes?: string[];
7274
// all other reserved words (not included to any of the above lists)
7375
reservedKeywords: string[];
7476
// Types of quotes to use for strings

src/lexer/disambiguateTokens.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ const keywordToArrayKeyword = (token: Token, i: number, tokens: Token[]): Token
5757
if (nextToken && isOpenBracket(nextToken)) {
5858
return { ...token, type: TokenType.ARRAY_KEYWORD };
5959
}
60+
} else if (token.type === TokenType.RESERVED_DATA_TYPE) {
61+
const nextToken = nextNonCommentToken(tokens, i);
62+
if (nextToken && isOpenBracket(nextToken)) {
63+
return { ...token, type: TokenType.ARRAY_DATA_TYPE };
64+
}
6065
}
6166
return token;
6267
};

src/lexer/token.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export enum TokenType {
44
IDENTIFIER = 'IDENTIFIER',
55
STRING = 'STRING',
66
VARIABLE = 'VARIABLE',
7+
RESERVED_DATA_TYPE = 'RESERVED_DATA_TYPE',
78
RESERVED_KEYWORD = 'RESERVED_KEYWORD',
89
RESERVED_FUNCTION_NAME = 'RESERVED_FUNCTION_NAME',
910
RESERVED_PHRASE = 'RESERVED_PHRASE',
@@ -12,6 +13,7 @@ export enum TokenType {
1213
RESERVED_SELECT = 'RESERVED_SELECT',
1314
RESERVED_JOIN = 'RESERVED_JOIN',
1415
ARRAY_IDENTIFIER = 'ARRAY_IDENTIFIER', // IDENTIFIER token in front of [
16+
ARRAY_DATA_TYPE = 'ARRAY_DATA_TYPE', // RESERVED_DATA_TYPE token in front of [
1517
ARRAY_KEYWORD = 'ARRAY_KEYWORD', // RESERVED_KEYWORD token in front of [
1618
CASE = 'CASE',
1719
END = 'END',
@@ -73,23 +75,25 @@ export const testToken =
7375

7476
/** Util object that allows for easy checking of Reserved Keywords */
7577
export const isToken = {
76-
ARRAY: testToken({ text: 'ARRAY', type: TokenType.RESERVED_KEYWORD }),
78+
ARRAY: testToken({ text: 'ARRAY', type: TokenType.RESERVED_DATA_TYPE }),
7779
BY: testToken({ text: 'BY', type: TokenType.RESERVED_KEYWORD }),
7880
SET: testToken({ text: 'SET', type: TokenType.RESERVED_CLAUSE }),
79-
STRUCT: testToken({ text: 'STRUCT', type: TokenType.RESERVED_KEYWORD }),
81+
STRUCT: testToken({ text: 'STRUCT', type: TokenType.RESERVED_DATA_TYPE }),
8082
WINDOW: testToken({ text: 'WINDOW', type: TokenType.RESERVED_CLAUSE }),
8183
VALUES: testToken({ text: 'VALUES', type: TokenType.RESERVED_CLAUSE }),
8284
};
8385

8486
/** Checks if token is any Reserved Keyword or Clause */
8587
export const isReserved = (type: TokenType): boolean =>
88+
type === TokenType.RESERVED_DATA_TYPE ||
8689
type === TokenType.RESERVED_KEYWORD ||
8790
type === TokenType.RESERVED_FUNCTION_NAME ||
8891
type === TokenType.RESERVED_PHRASE ||
8992
type === TokenType.RESERVED_CLAUSE ||
9093
type === TokenType.RESERVED_SELECT ||
9194
type === TokenType.RESERVED_SET_OPERATION ||
9295
type === TokenType.RESERVED_JOIN ||
96+
type === TokenType.ARRAY_DATA_TYPE ||
9397
type === TokenType.ARRAY_KEYWORD ||
9498
type === TokenType.CASE ||
9599
type === TokenType.END ||

src/parser/ast.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export enum NodeType {
1717
literal = 'literal',
1818
identifier = 'identifier',
1919
keyword = 'keyword',
20+
data_type = 'data_type',
2021
parameter = 'parameter',
2122
operator = 'operator',
2223
comma = 'comma',
@@ -56,7 +57,7 @@ export interface FunctionCallNode extends BaseNode {
5657
// <ident>[<expr>]
5758
export interface ArraySubscriptNode extends BaseNode {
5859
type: NodeType.array_subscript;
59-
array: IdentifierNode | KeywordNode;
60+
array: IdentifierNode | KeywordNode | DataTypeNode;
6061
parenthesis: ParenthesisNode;
6162
}
6263

@@ -129,6 +130,12 @@ export interface IdentifierNode extends BaseNode {
129130
text: string;
130131
}
131132

133+
export interface DataTypeNode extends BaseNode {
134+
type: NodeType.data_type;
135+
text: string;
136+
raw: string;
137+
}
138+
132139
export interface KeywordNode extends BaseNode {
133140
type: NodeType.keyword;
134141
tokenType: TokenType;
@@ -180,6 +187,7 @@ export type AstNode =
180187
| AllColumnsAsteriskNode
181188
| LiteralNode
182189
| IdentifierNode
190+
| DataTypeNode
183191
| KeywordNode
184192
| ParameterNode
185193
| OperatorNode

src/parser/grammar.ne

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
@preprocessor typescript
22
@{%
33
import LexerAdapter from './LexerAdapter.js';
4-
import { NodeType, AstNode, CommentNode, KeywordNode, IdentifierNode } from './ast.js';
4+
import { NodeType, AstNode, CommentNode, KeywordNode, IdentifierNode, DataTypeNode } from './ast.js';
55
import { Token, TokenType } from '../lexer/token.js';
66

77
// The lexer here is only to provide the has() method,
@@ -23,6 +23,12 @@ const toKeywordNode = (token: Token): KeywordNode => ({
2323
raw: token.raw,
2424
});
2525

26+
const toDataTypeNode = (token: Token): DataTypeNode => ({
27+
type: NodeType.data_type,
28+
text: token.text,
29+
raw: token.raw,
30+
});
31+
2632
interface CommentAttachments {
2733
leading?: CommentNode[];
2834
trailing?: CommentNode[];
@@ -197,6 +203,7 @@ atomic_expression ->
197203
| identifier
198204
| parameter
199205
| literal
206+
| data_type
200207
| keyword ) {% unwrap %}
201208

202209
array_subscript -> %ARRAY_IDENTIFIER _ square_brackets {%
@@ -206,6 +213,13 @@ array_subscript -> %ARRAY_IDENTIFIER _ square_brackets {%
206213
parenthesis: brackets,
207214
})
208215
%}
216+
array_subscript -> %ARRAY_DATA_TYPE _ square_brackets {%
217+
([arrayToken, _, brackets]) => ({
218+
type: NodeType.array_subscript,
219+
array: addComments(toDataTypeNode(arrayToken), { trailing: _ }),
220+
parenthesis: brackets,
221+
})
222+
%}
209223
array_subscript -> %ARRAY_KEYWORD _ square_brackets {%
210224
([arrayToken, _, brackets]) => ({
211225
type: NodeType.array_subscript,
@@ -329,6 +343,11 @@ keyword ->
329343
([[token]]) => toKeywordNode(token)
330344
%}
331345

346+
data_type ->
347+
( %RESERVED_DATA_TYPE ) {%
348+
([[token]]) => toDataTypeNode(token)
349+
%}
350+
332351
logic_operator ->
333352
( %AND
334353
| %OR

src/sqlFormatter.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import * as allDialects from './allDialects.js';
2-
3-
import { FormatOptions } from './FormatOptions.js';
42
import { createDialect, DialectOptions } from './dialect.js';
3+
import { FormatOptions } from './FormatOptions.js';
54
import Formatter from './formatter/Formatter.js';
65
import { ConfigError, validateConfig } from './validateConfig.js';
76

@@ -42,6 +41,7 @@ const defaultOptions: FormatOptions = {
4241
useTabs: false,
4342
keywordCase: 'preserve',
4443
identifierCase: 'preserve',
44+
dataTypeCase: 'preserve',
4545
indentStyle: 'standard',
4646
logicalOperatorNewline: 'before',
4747
expressionWidth: 50,

test/snowflake.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,12 @@ describe('SnowflakeFormatter', () => {
167167
`);
168168
});
169169

170-
it('detects data types as keywords', () => {
170+
it('detects data types as data types', () => {
171171
expect(
172172
format(
173173
`CREATE TABLE tbl (first_column double Precision, second_column numBer (38, 0), third String);`,
174174
{
175-
keywordCase: 'upper',
175+
dataTypeCase: 'upper',
176176
}
177177
)
178178
).toBe(dedent`

0 commit comments

Comments
 (0)