diff --git a/.gitignore b/.gitignore index c6bba59..25eac54 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,6 @@ dist .yarn/build-state.yml .yarn/install-state.gz .pnp.* + +# Editor settings +.idea diff --git a/README.md b/README.md index 72e33a0..b96e508 100644 --- a/README.md +++ b/README.md @@ -31,3 +31,16 @@ npm run build ```bash npm test ``` + +### Formatting and Linting + +```bash +# This is just to check if the code is formatted +npm run format:check + +# Automatically format code +npm run format + +# Lint +npm run lint + ``` diff --git a/src/schema/arrow.ts b/src/schema/arrow.ts new file mode 100644 index 0000000..c36b03a --- /dev/null +++ b/src/schema/arrow.ts @@ -0,0 +1,9 @@ +export const METADATA_UNIQUE = 'cq:extension:unique'; +export const METADATA_PRIMARY_KEY = 'cq:extension:primary_key'; +export const METADATA_CONSTRAINT_NAME = 'cq:extension:constraint_name'; +export const METADATA_INCREMENTAL = 'cq:extension:incremental'; + +export const METADATA_TRUE = 'true'; +export const METADATA_FALSE = 'false'; +export const METADATA_TABLE_NAME = 'cq:table_name'; +export const METADATA_TABLE_DESCRIPTION = 'cq:table_description'; diff --git a/src/schema/column.ts b/src/schema/column.ts new file mode 100644 index 0000000..7bf6a4e --- /dev/null +++ b/src/schema/column.ts @@ -0,0 +1,73 @@ +import { DataType, Field } from '@apache-arrow/esnext-esm'; + +import * as arrow from './arrow.js'; + +export class Column { + name: string; + type: DataType; + description: string; + primary_key: boolean; + not_null: boolean; + incremental_key: boolean; + unique: boolean; + + constructor( + name: string, + type: DataType, + description: string = '', + primary_key: boolean = false, + not_null: boolean = false, + incremental_key: boolean = false, + unique: boolean = false, + ) { + this.name = name; + this.type = type; + this.description = description; + this.primary_key = primary_key; + this.not_null = not_null; + this.incremental_key = incremental_key; + this.unique = unique; + } + + toString(): string { + return `Column(name=${this.name}, type=${this.type}, description=${this.description}, primary_key=${this.primary_key}, not_null=${this.not_null}, incremental_key=${this.incremental_key}, unique=${this.unique})`; + } + + // JavaScript (and TypeScript) uses a single method for both string representation and debugging output + toJSON(): string { + return this.toString(); + } + + equals(value: object): boolean { + if (value instanceof Column) { + return ( + this.name === value.name && + this.type === value.type && + this.description === value.description && + this.primary_key === value.primary_key && + this.not_null === value.not_null && + this.incremental_key === value.incremental_key && + this.unique === value.unique + ); + } + return false; + } + + toArrowField(): Field { + const metadataMap = new Map(); + metadataMap.set(arrow.METADATA_PRIMARY_KEY, this.primary_key ? arrow.METADATA_TRUE : arrow.METADATA_FALSE); + metadataMap.set(arrow.METADATA_UNIQUE, this.unique ? arrow.METADATA_TRUE : arrow.METADATA_FALSE); + metadataMap.set(arrow.METADATA_INCREMENTAL, this.incremental_key ? arrow.METADATA_TRUE : arrow.METADATA_FALSE); + + return new Field(this.name, this.type, /*nullable=*/ !this.not_null, metadataMap); + } + + static fromArrowField(field: Field): Column { + const metadata = field.metadata; + const primary_key = metadata.get(arrow.METADATA_PRIMARY_KEY) === arrow.METADATA_TRUE; + const unique = metadata.get(arrow.METADATA_UNIQUE) === arrow.METADATA_TRUE; + const incremental_key = metadata.get(arrow.METADATA_INCREMENTAL) === arrow.METADATA_TRUE; + + return new Column(field.name, field.type, '', primary_key, !field.nullable, unique, incremental_key); + } +} diff --git a/src/transformers/openapi.test.ts b/src/transformers/openapi.test.ts new file mode 100644 index 0000000..62937d5 --- /dev/null +++ b/src/transformers/openapi.test.ts @@ -0,0 +1,61 @@ +import { DataType, Utf8, Int64, Bool } from '@apache-arrow/esnext-esm'; +import test from 'ava'; + +import { Column } from '../schema/column.js'; +import { JSONType } from '../types/json.js'; + +import { oapiDefinitionToColumns } from './openapi.js'; + +const OAPI_SPEC = { + swagger: '2.0', + info: { + version: '2.0', + title: 'Test API', + description: 'Unit tests APIs', + }, + host: 'cloudquery.io', + schemes: ['https'], + consumes: ['application/json'], + produces: ['application/json'], + paths: {}, + definitions: { + TestDefinition: { + type: 'object', + properties: { + string: { + type: 'string', + }, + number: { + type: 'number', + }, + integer: { + type: 'integer', + }, + boolean: { + type: 'boolean', + }, + object: { + $ref: '#/definitions/SomeDefinition', + }, + array: { + type: 'array', + items: { $ref: '#/definitions/SomeDefinition' }, + }, + }, + }, + }, +}; + +test('should parse spec as expected', (t) => { + const expectedColumns: Column[] = [ + new Column('string', new Utf8(), ''), + new Column('number', new Int64(), ''), + new Column('integer', new Int64(), ''), + new Column('boolean', new Bool(), ''), + new Column('object', new JSONType(), ''), + new Column('array', new JSONType(), ''), + ]; + + const columns = oapiDefinitionToColumns(OAPI_SPEC['definitions']['TestDefinition']); + t.deepEqual(columns, expectedColumns); +}); diff --git a/src/transformers/openapi.ts b/src/transformers/openapi.ts new file mode 100644 index 0000000..7f44071 --- /dev/null +++ b/src/transformers/openapi.ts @@ -0,0 +1,68 @@ +import { DataType, Field, Utf8, Int64, Bool } from '@apache-arrow/esnext-esm'; + +import { Column } from '../schema/column.js'; +import { JSONType } from '../types/json.js'; + +interface OAPIProperty { + type?: string; + description?: string; + $ref?: string; + items?: { + $ref: string; + }; +} + +interface OAPIDefinition { + properties: { + [key: string]: OAPIProperty; + }; +} + +function oapiTypeToArrowType(field: OAPIProperty): DataType { + const oapiType = field.type; + switch (oapiType) { + case 'string': { + return new Utf8(); + } + case 'number': + case 'integer': { + return new Int64(); + } + case 'boolean': { + return new Bool(); + } + case 'array': + case 'object': { + return new JSONType(); + } + default: { + return !oapiType && '$ref' in field ? new JSONType() : new Utf8(); + } + } +} + +export function getColumnByName(columns: Column[], name: string): Column | undefined { + for (const column of columns) { + if (column.name === name) { + return column; + } + } + return undefined; +} + +export function oapiDefinitionToColumns(definition: OAPIDefinition, overrideColumns: Column[] = []): Column[] { + const columns: Column[] = []; + for (const key in definition.properties) { + const value = definition.properties[key]; + const columnType = oapiTypeToArrowType(value); + const column = new Column(key, columnType, value.description); + const overrideColumn = getColumnByName(overrideColumns, key); + if (overrideColumn) { + column.type = overrideColumn.type; + column.primary_key = overrideColumn.primary_key; + column.unique = overrideColumn.unique; + } + columns.push(column); + } + return columns; +} diff --git a/src/types/json.ts b/src/types/json.ts new file mode 100644 index 0000000..3a001f9 --- /dev/null +++ b/src/types/json.ts @@ -0,0 +1,21 @@ +import { DataType, Binary, Type } from '@apache-arrow/esnext-esm'; + +export class JSONType extends DataType { + readonly extensionName: string = 'json'; + + constructor() { + super(); + // Assuming there's no direct way to set the storage type in the constructor, + // this is just a representation of the JSONType. + } + + serialize(): ArrayBuffer { + // Implement your serialization logic here. + return new TextEncoder().encode('json-serialized').buffer; + } + + static deserialize(storageType: Binary, serialized: ArrayBuffer): JSONType { + // Implement your deserialization logic here. + return new JSONType(); + } +} diff --git a/src/types/uuid.ts b/src/types/uuid.ts new file mode 100644 index 0000000..923f10d --- /dev/null +++ b/src/types/uuid.ts @@ -0,0 +1,22 @@ +import { DataType, Binary, Type } from '@apache-arrow/esnext-esm'; + +export class UUIDType extends DataType { + readonly extensionName: string = 'uuid'; + + constructor() { + super(); + // The underlying storage type is a binary of 16 bytes, representing a UUID. + // Assuming there's no direct way to set the storage type in the constructor, + // this is just a representation of the UUIDType. + } + + serialize(): ArrayBuffer { + // Implement your serialization logic here. + return new TextEncoder().encode('uuid-serialized').buffer; + } + + static deserialize(storageType: Binary, serialized: ArrayBuffer): UUIDType { + // Implement your deserialization logic here. + return new UUIDType(); + } +}