diff --git a/src/scalar/json.ts b/src/scalar/json.ts index 5a23504..4a941de 100644 --- a/src/scalar/json.ts +++ b/src/scalar/json.ts @@ -12,9 +12,9 @@ const validate = (value: string) => { } }; -class JSONType implements Scalar { +class JSONType implements Scalar { private _valid = false; - private _value = ''; + private _value = new TextEncoder().encode(NULL_VALUE); public constructor(v?: unknown) { this.value = v; @@ -29,7 +29,7 @@ class JSONType implements Scalar { return this._valid; } - public get value(): string { + public get value(): Uint8Array { return this._value; } @@ -40,14 +40,14 @@ class JSONType implements Scalar { } if (typeof value === 'string') { - this._value = value; + this._value = new TextEncoder().encode(value); this._valid = validate(value); return; } if (value instanceof Uint8Array) { - this._value = new TextDecoder().decode(value); - this._valid = validate(this._value); + this._value = value; + this._valid = validate(new TextDecoder().decode(value)); return; } @@ -58,7 +58,7 @@ class JSONType implements Scalar { } try { - this._value = JSON.stringify(value); + this._value = new TextEncoder().encode(JSON.stringify(value)); this._valid = true; } catch { throw new Error(`Unable to set '${value}' as JSON`); @@ -67,7 +67,7 @@ class JSONType implements Scalar { public toString() { if (this._valid) { - return this._value; + return new TextDecoder().decode(this._value); } return NULL_VALUE; diff --git a/src/scalar/uuid.ts b/src/scalar/uuid.ts index 41a1db2..e87d008 100644 --- a/src/scalar/uuid.ts +++ b/src/scalar/uuid.ts @@ -1,27 +1,26 @@ -import { Utf8 as ArrowString } from '@apache-arrow/esnext-esm'; +import { FixedSizeBinary } from '@apache-arrow/esnext-esm'; import { validate } from 'uuid'; import { Scalar } from './scalar.js'; import { isInvalid, NULL_VALUE } from './util.js'; -export class UUID implements Scalar { +export class UUID implements Scalar { private _valid = false; - private _value = ''; + private _value = new TextEncoder().encode(NULL_VALUE); public constructor(v?: unknown) { this.value = v; - return this; } public get dataType() { - return new ArrowString(); + return new FixedSizeBinary(16); } public get valid(): boolean { return this._valid; } - public get value(): string { + public get value(): Uint8Array { return this._value; } @@ -32,14 +31,14 @@ export class UUID implements Scalar { } if (typeof value === 'string') { - this._value = value; + this._value = new TextEncoder().encode(value); this._valid = validate(value); return; } if (value instanceof Uint8Array) { - this._value = new TextDecoder().decode(value); - this._valid = validate(this._value); + this._value = value; + this._valid = validate(new TextDecoder().decode(value)); return; } @@ -50,8 +49,8 @@ export class UUID implements Scalar { } if (typeof value!.toString === 'function' && value!.toString !== Object.prototype.toString) { - this._value = value!.toString(); - this._valid = validate(this._value); + this._value = Buffer.from(value!.toString()); + this._valid = validate(value!.toString()); return; } @@ -60,7 +59,7 @@ export class UUID implements Scalar { public toString() { if (this._valid) { - return this._value; + return new TextDecoder().decode(this._value); } return NULL_VALUE; diff --git a/src/scheduler/cqid.test.ts b/src/scheduler/cqid.test.ts index bd7f778..44bae2c 100644 --- a/src/scheduler/cqid.test.ts +++ b/src/scheduler/cqid.test.ts @@ -27,8 +27,9 @@ test('setCQId - should set to random value if deterministicCQId is false', (t): setCQId(resource, false, () => NIL_UUID); - t.is(resource.getColumnData(cqIDColumn.name).valid, true); - t.is(resource.getColumnData(cqIDColumn.name).value.toString(), NIL_UUID); + const cqId = resource.getColumnData(cqIDColumn.name); + t.is(cqId.valid, true); + t.is(cqId.toString(), NIL_UUID); }); test('setCQId - should set to random value if deterministicCQId is true and table does not have non _cq_id PKs', (t): void => { @@ -50,8 +51,9 @@ test('setCQId - should set to random value if deterministicCQId is true and tabl setCQId(resource, true, () => NIL_UUID); - t.is(resource.getColumnData(cqIDColumn.name).valid, true); - t.is(resource.getColumnData(cqIDColumn.name).value.toString(), NIL_UUID); + const cqId = resource.getColumnData(cqIDColumn.name); + t.is(cqId.valid, true); + t.is(cqId.toString(), NIL_UUID); }); test('setCQId - should set to fixed value if deterministicCQId is true and table has non _cq_id PKs', (t): void => { @@ -78,6 +80,7 @@ test('setCQId - should set to fixed value if deterministicCQId is true and table setCQId(resource, true); - t.is(resource.getColumnData(cqIDColumn.name).valid, true); - t.is(resource.getColumnData(cqIDColumn.name).value.toString(), '415bd5dd-9bac-5806-b9d1-c53f17d37455'); + const cqId = resource.getColumnData(cqIDColumn.name); + t.is(cqId.valid, true); + t.is(cqId.toString(), '415bd5dd-9bac-5806-b9d1-c53f17d37455'); }); diff --git a/src/schema/arrow.ts b/src/schema/arrow.ts index 1c63e23..d27d8d6 100644 --- a/src/schema/arrow.ts +++ b/src/schema/arrow.ts @@ -9,3 +9,6 @@ export const METADATA_TABLE_NAME = 'cq:table_name'; export const METADATA_TABLE_TITLE = 'cq:table_title'; export const METADATA_TABLE_DESCRIPTION = 'cq:table_description'; export const METADATA_TABLE_DEPENDS_ON = 'cq:table_depends_on'; + +export const METADATA_ARROW_EXTENSION_NAME = 'ARROW:extension:name'; +export const METADATA_ARROW_EXTENSION_METADATA = 'ARROW:extension:metadata'; diff --git a/src/schema/column.ts b/src/schema/column.ts index 4225c66..186944d 100644 --- a/src/schema/column.ts +++ b/src/schema/column.ts @@ -2,6 +2,8 @@ import { isDeepStrictEqual } from 'node:util'; import { DataType, Field, Utf8 } from '@apache-arrow/esnext-esm'; +import { ExtensionType, isExtensionType } from '../types/extensions.js'; + import * as arrow from './arrow.js'; import { ClientMeta } from './meta.js'; import { Resource } from './resource.js'; @@ -60,7 +62,13 @@ export const toArrowField = (column: Column): Field => { metadataMap.set(arrow.METADATA_UNIQUE, unique ? arrow.METADATA_TRUE : arrow.METADATA_FALSE); metadataMap.set(arrow.METADATA_INCREMENTAL, incrementalKey ? arrow.METADATA_TRUE : arrow.METADATA_FALSE); - return new Field(name, type, /*nullable=*/ !notNull, metadataMap); + if (isExtensionType(type)) { + const { name, metadata } = type as unknown as ExtensionType; + metadataMap.set(arrow.METADATA_ARROW_EXTENSION_NAME, name); + metadataMap.set(arrow.METADATA_ARROW_EXTENSION_METADATA, metadata); + } + + return new Field(name, type, !notNull, metadataMap); }; export const fromArrowField = (field: Field): Column => { diff --git a/src/schema/meta.test.ts b/src/schema/meta.test.ts index 08afbfa..71c6267 100644 --- a/src/schema/meta.test.ts +++ b/src/schema/meta.test.ts @@ -95,6 +95,7 @@ test('parentCqUUIDResolver - should set to _cq_id column value when parent has i parentCqUUIDResolver()({ id: () => '' }, childResource, cqParentIDColumn); - t.is(childResource.getColumnData(cqParentIDColumn.name).value, '9241a9cb-f580-420f-8fd7-46d2c4f55ccb'); - t.is(childResource.getColumnData(cqParentIDColumn.name).valid, true); + const cqParentId = childResource.getColumnData(cqParentIDColumn.name); + t.is(cqParentId.valid, true); + t.is(cqParentId.toString(), '9241a9cb-f580-420f-8fd7-46d2c4f55ccb'); }); diff --git a/src/schema/resource.ts b/src/schema/resource.ts index 3d70f02..5c90a84 100644 --- a/src/schema/resource.ts +++ b/src/schema/resource.ts @@ -1,6 +1,7 @@ import { tableToIPC, Table as ArrowTable, RecordBatch, vectorFromArray } from '@apache-arrow/esnext-esm'; import { Scalar, Vector, newScalar, Stringable } from '../scalar/scalar.js'; +import { isExtensionType } from '../types/extensions.js'; import { cqIDColumn } from './meta.js'; import { Table, toArrowSchema } from './table.js'; @@ -59,7 +60,11 @@ export const encodeResource = (resource: Resource): Uint8Array => { let batch = new RecordBatch(schema, undefined); for (let index = 0; index < table.columns.length; index++) { const column = table.columns[index]; - const data = resource.getColumnData(column.name); + // For extension types, we need to get the underlying value + const data = isExtensionType(column.type) + ? resource.getColumnData(column.name).value + : resource.getColumnData(column.name); + const vector = vectorFromArray([data], column.type); batch = batch.setChildAt(index, vector); } diff --git a/src/types/extensions.ts b/src/types/extensions.ts new file mode 100644 index 0000000..8e032ed --- /dev/null +++ b/src/types/extensions.ts @@ -0,0 +1,13 @@ +import { DataType } from '@apache-arrow/esnext-esm'; + +import { JSONType } from './json.js'; +import { UUIDType } from './uuid.js'; + +export interface ExtensionType { + get name(): string; + get metadata(): string; +} + +const extensions = [JSONType, UUIDType]; + +export const isExtensionType = (type: DataType) => extensions.some((extension) => type instanceof extension); diff --git a/src/types/json.ts b/src/types/json.ts index ab0cb4e..5396ac5 100644 --- a/src/types/json.ts +++ b/src/types/json.ts @@ -1,13 +1,16 @@ -import { DataType, Type } from '@apache-arrow/esnext-esm'; +import { Type, DataType } from '@apache-arrow/esnext-esm'; -export class JSONType extends DataType { - readonly extensionName: string = 'json'; +import { ExtensionType } from './extensions.js'; - constructor() { - super(); +export class JSONType extends DataType implements ExtensionType { + get name(): string { + return 'json'; + } + get metadata(): string { + return 'json-serialized'; } - get typeId(): Type.Utf8 { - return Type.Utf8; + get typeId(): Type.Binary { + return Type.Binary; } } diff --git a/src/types/uuid.ts b/src/types/uuid.ts index d0ee013..ef7133e 100644 --- a/src/types/uuid.ts +++ b/src/types/uuid.ts @@ -1,23 +1,20 @@ -import { DataType, Type } from '@apache-arrow/esnext-esm'; +import { Type, FixedSizeBinary } from '@apache-arrow/esnext-esm'; -export class UUIDType extends DataType { - readonly extensionName: string = 'uuid'; +import { ExtensionType } from './extensions.js'; +export class UUIDType extends FixedSizeBinary implements ExtensionType { constructor() { - super(); + super(16); } - get typeId(): Type.Utf8 { - return Type.Utf8; + get name(): string { + return 'uuid'; } - - public toString() { - return `uuid`; + get metadata(): string { + return 'uuid-serialized'; } - protected static [Symbol.toStringTag] = ((proto: UUIDType) => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (proto).ArrayType = Uint8Array; - return (proto[Symbol.toStringTag] = 'uuid'); - })(UUIDType.prototype); + get typeId(): Type.FixedSizeBinary { + return Type.FixedSizeBinary; + } }