Skip to content

feat: Transformers #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ dist
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# Editor settings
.idea
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,16 @@ npm run build
```bash
npm test
```

### Formatting and Linting

```bash
# This is just to check if the code is formatted
npm run format:check

# Automatically format code
npm run format

# Lint
npm run lint
```
9 changes: 9 additions & 0 deletions src/schema/arrow.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export const METADATA_UNIQUE = 'cq:extension:unique';
export const METADATA_PRIMARY_KEY = 'cq:extension:primary_key';
export const METADATA_CONSTRAINT_NAME = 'cq:extension:constraint_name';
export const METADATA_INCREMENTAL = 'cq:extension:incremental';

export const METADATA_TRUE = 'true';
export const METADATA_FALSE = 'false';
export const METADATA_TABLE_NAME = 'cq:table_name';
export const METADATA_TABLE_DESCRIPTION = 'cq:table_description';
73 changes: 73 additions & 0 deletions src/schema/column.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { DataType, Field } from '@apache-arrow/esnext-esm';

import * as arrow from './arrow.js';

export class Column {
name: string;
type: DataType;
description: string;
primary_key: boolean;
not_null: boolean;
incremental_key: boolean;
unique: boolean;

constructor(
name: string,
type: DataType,
description: string = '',
primary_key: boolean = false,
not_null: boolean = false,
incremental_key: boolean = false,
unique: boolean = false,
) {
this.name = name;
this.type = type;
this.description = description;
this.primary_key = primary_key;
this.not_null = not_null;
this.incremental_key = incremental_key;
this.unique = unique;
}

toString(): string {
return `Column(name=${this.name}, type=${this.type}, description=${this.description}, primary_key=${this.primary_key}, not_null=${this.not_null}, incremental_key=${this.incremental_key}, unique=${this.unique})`;
}

// JavaScript (and TypeScript) uses a single method for both string representation and debugging output
toJSON(): string {
return this.toString();
}

equals(value: object): boolean {
if (value instanceof Column) {
return (
this.name === value.name &&
this.type === value.type &&
this.description === value.description &&
this.primary_key === value.primary_key &&
this.not_null === value.not_null &&
this.incremental_key === value.incremental_key &&
this.unique === value.unique
);
}
return false;
}

toArrowField(): Field {
const metadataMap = new Map<string, string>();
metadataMap.set(arrow.METADATA_PRIMARY_KEY, this.primary_key ? arrow.METADATA_TRUE : arrow.METADATA_FALSE);
metadataMap.set(arrow.METADATA_UNIQUE, this.unique ? arrow.METADATA_TRUE : arrow.METADATA_FALSE);
metadataMap.set(arrow.METADATA_INCREMENTAL, this.incremental_key ? arrow.METADATA_TRUE : arrow.METADATA_FALSE);

return new Field(this.name, this.type, /*nullable=*/ !this.not_null, metadataMap);
}

static fromArrowField(field: Field): Column {
const metadata = field.metadata;
const primary_key = metadata.get(arrow.METADATA_PRIMARY_KEY) === arrow.METADATA_TRUE;
const unique = metadata.get(arrow.METADATA_UNIQUE) === arrow.METADATA_TRUE;
const incremental_key = metadata.get(arrow.METADATA_INCREMENTAL) === arrow.METADATA_TRUE;

return new Column(field.name, field.type, '', primary_key, !field.nullable, unique, incremental_key);
}
}
61 changes: 61 additions & 0 deletions src/transformers/openapi.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { DataType, Utf8, Int64, Bool } from '@apache-arrow/esnext-esm';
import test from 'ava';

import { Column } from '../schema/column.js';
import { JSONType } from '../types/json.js';

import { oapiDefinitionToColumns } from './openapi.js';

const OAPI_SPEC = {
swagger: '2.0',
info: {
version: '2.0',
title: 'Test API',
description: 'Unit tests APIs',
},
host: 'cloudquery.io',
schemes: ['https'],
consumes: ['application/json'],
produces: ['application/json'],
paths: {},
definitions: {
TestDefinition: {
type: 'object',
properties: {
string: {
type: 'string',
},
number: {
type: 'number',
},
integer: {
type: 'integer',
},
boolean: {
type: 'boolean',
},
object: {
$ref: '#/definitions/SomeDefinition',
},
array: {
type: 'array',
items: { $ref: '#/definitions/SomeDefinition' },
},
},
},
},
};

test('should parse spec as expected', (t) => {
const expectedColumns: Column[] = [
new Column('string', new Utf8(), ''),
new Column('number', new Int64(), ''),
new Column('integer', new Int64(), ''),
new Column('boolean', new Bool(), ''),
new Column('object', new JSONType(), ''),
new Column('array', new JSONType(), ''),
];

const columns = oapiDefinitionToColumns(OAPI_SPEC['definitions']['TestDefinition']);
t.deepEqual(columns, expectedColumns);
});
68 changes: 68 additions & 0 deletions src/transformers/openapi.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { DataType, Field, Utf8, Int64, Bool } from '@apache-arrow/esnext-esm';

import { Column } from '../schema/column.js';
import { JSONType } from '../types/json.js';

interface OAPIProperty {
type?: string;
description?: string;
$ref?: string;
items?: {
$ref: string;
};
}

interface OAPIDefinition {
properties: {
[key: string]: OAPIProperty;
};
}

function oapiTypeToArrowType(field: OAPIProperty): DataType {
const oapiType = field.type;
switch (oapiType) {
case 'string': {
return new Utf8();
}
case 'number':
case 'integer': {
return new Int64();
}
case 'boolean': {
return new Bool();
}
case 'array':
case 'object': {
return new JSONType();
}
default: {
return !oapiType && '$ref' in field ? new JSONType() : new Utf8();
}
}
}

export function getColumnByName(columns: Column[], name: string): Column | undefined {
for (const column of columns) {
if (column.name === name) {
return column;
}
}
return undefined;
}

export function oapiDefinitionToColumns(definition: OAPIDefinition, overrideColumns: Column[] = []): Column[] {
const columns: Column[] = [];
for (const key in definition.properties) {
const value = definition.properties[key];
const columnType = oapiTypeToArrowType(value);
const column = new Column(key, columnType, value.description);
const overrideColumn = getColumnByName(overrideColumns, key);
if (overrideColumn) {
column.type = overrideColumn.type;
column.primary_key = overrideColumn.primary_key;
column.unique = overrideColumn.unique;
}
columns.push(column);
}
return columns;
}
21 changes: 21 additions & 0 deletions src/types/json.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { DataType, Binary, Type } from '@apache-arrow/esnext-esm';

export class JSONType extends DataType<Type.Binary> {
readonly extensionName: string = 'json';

constructor() {
super();
// Assuming there's no direct way to set the storage type in the constructor,
// this is just a representation of the JSONType.
}

serialize(): ArrayBuffer {
// Implement your serialization logic here.
return new TextEncoder().encode('json-serialized').buffer;
}

static deserialize(storageType: Binary, serialized: ArrayBuffer): JSONType {
// Implement your deserialization logic here.
return new JSONType();
}
}
22 changes: 22 additions & 0 deletions src/types/uuid.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { DataType, Binary, Type } from '@apache-arrow/esnext-esm';

export class UUIDType extends DataType<Type.Binary> {
readonly extensionName: string = 'uuid';

constructor() {
super();
// The underlying storage type is a binary of 16 bytes, representing a UUID.
// Assuming there's no direct way to set the storage type in the constructor,
// this is just a representation of the UUIDType.
}

serialize(): ArrayBuffer {
// Implement your serialization logic here.
return new TextEncoder().encode('uuid-serialized').buffer;
}

static deserialize(storageType: Binary, serialized: ArrayBuffer): UUIDType {
// Implement your deserialization logic here.
return new UUIDType();
}
}