4
4
5
5
import com .google .protobuf .ByteString ;
6
6
import io .cloudquery .schema .Column ;
7
+ import io .cloudquery .schema .Resource ;
7
8
import io .cloudquery .schema .Table ;
8
9
import io .cloudquery .schema .Table .TableBuilder ;
10
+ import io .cloudquery .types .JSONType .JSONVector ;
11
+ import io .cloudquery .types .UUIDType .UUIDVector ;
9
12
import java .io .ByteArrayOutputStream ;
10
13
import java .io .IOException ;
11
14
import java .nio .channels .Channels ;
15
18
import java .util .Map ;
16
19
import org .apache .arrow .memory .BufferAllocator ;
17
20
import org .apache .arrow .memory .RootAllocator ;
21
+ import org .apache .arrow .vector .BigIntVector ;
22
+ import org .apache .arrow .vector .BitVector ;
23
+ import org .apache .arrow .vector .FieldVector ;
24
+ import org .apache .arrow .vector .FixedSizeBinaryVector ;
25
+ import org .apache .arrow .vector .Float4Vector ;
26
+ import org .apache .arrow .vector .Float8Vector ;
27
+ import org .apache .arrow .vector .IntVector ;
28
+ import org .apache .arrow .vector .LargeVarBinaryVector ;
29
+ import org .apache .arrow .vector .LargeVarCharVector ;
30
+ import org .apache .arrow .vector .SmallIntVector ;
31
+ import org .apache .arrow .vector .TimeStampVector ;
32
+ import org .apache .arrow .vector .TinyIntVector ;
33
+ import org .apache .arrow .vector .UInt1Vector ;
34
+ import org .apache .arrow .vector .UInt2Vector ;
35
+ import org .apache .arrow .vector .UInt4Vector ;
36
+ import org .apache .arrow .vector .UInt8Vector ;
37
+ import org .apache .arrow .vector .VarBinaryVector ;
38
+ import org .apache .arrow .vector .VarCharVector ;
18
39
import org .apache .arrow .vector .VectorSchemaRoot ;
19
40
import org .apache .arrow .vector .ipc .ArrowReader ;
20
41
import org .apache .arrow .vector .ipc .ArrowStreamReader ;
21
42
import org .apache .arrow .vector .ipc .ArrowStreamWriter ;
22
43
import org .apache .arrow .vector .types .pojo .Field ;
44
+ import org .apache .arrow .vector .types .pojo .FieldType ;
23
45
import org .apache .arrow .vector .types .pojo .Schema ;
46
+ import org .apache .arrow .vector .util .Text ;
24
47
25
48
public class ArrowHelper {
49
+ public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental" ;
50
+ public static final String CQ_EXTENSION_CONSTRAINT_NAME = "cq:extension:constraint_name" ;
51
+ public static final String CQ_EXTENSION_PRIMARY_KEY = "cq:extension:primary_key" ;
52
+ public static final String CQ_EXTENSION_UNIQUE = "cq:extension:unique" ;
26
53
public static final String CQ_TABLE_NAME = "cq:table_name" ;
27
54
public static final String CQ_TABLE_TITLE = "cq:table_title" ;
28
55
public static final String CQ_TABLE_DESCRIPTION = "cq:table_description" ;
29
56
public static final String CQ_TABLE_DEPENDS_ON = "cq:table_depends_on" ;
30
57
58
+ private static void setVectorData (FieldVector vector , Object data ) {
59
+ vector .allocateNew ();
60
+ vector .setValueCount (1 );
61
+ if (vector instanceof BigIntVector ) {
62
+ ((BigIntVector ) vector ).set (0 , (long ) data );
63
+ return ;
64
+ }
65
+ if (vector instanceof BitVector ) {
66
+ ((BitVector ) vector ).set (0 , (int ) data );
67
+ return ;
68
+ }
69
+ if (vector instanceof FixedSizeBinaryVector ) {
70
+ ((FixedSizeBinaryVector ) vector ).set (0 , (byte []) data );
71
+ return ;
72
+ }
73
+ if (vector instanceof Float4Vector ) {
74
+ ((Float4Vector ) vector ).set (0 , (float ) data );
75
+ return ;
76
+ }
77
+ if (vector instanceof Float8Vector ) {
78
+ ((Float8Vector ) vector ).set (0 , (double ) data );
79
+ return ;
80
+ }
81
+ if (vector instanceof IntVector ) {
82
+ ((IntVector ) vector ).set (0 , (int ) data );
83
+ return ;
84
+ }
85
+ if (vector instanceof LargeVarBinaryVector ) {
86
+ ((LargeVarBinaryVector ) vector ).set (0 , (byte []) data );
87
+ return ;
88
+ }
89
+ if (vector instanceof LargeVarCharVector ) {
90
+ ((LargeVarCharVector ) vector ).set (0 , (Text ) data );
91
+ return ;
92
+ }
93
+ if (vector instanceof SmallIntVector ) {
94
+ ((SmallIntVector ) vector ).set (0 , (short ) data );
95
+ return ;
96
+ }
97
+ if (vector instanceof TimeStampVector ) {
98
+ ((TimeStampVector ) vector ).set (0 , (long ) data );
99
+ return ;
100
+ }
101
+ if (vector instanceof TinyIntVector ) {
102
+ ((TinyIntVector ) vector ).set (0 , (byte ) data );
103
+ return ;
104
+ }
105
+ if (vector instanceof UInt1Vector ) {
106
+ ((UInt1Vector ) vector ).set (0 , (byte ) data );
107
+ return ;
108
+ }
109
+ if (vector instanceof UInt2Vector ) {
110
+ ((UInt2Vector ) vector ).set (0 , (short ) data );
111
+ return ;
112
+ }
113
+ if (vector instanceof UInt4Vector ) {
114
+ ((UInt4Vector ) vector ).set (0 , (int ) data );
115
+ return ;
116
+ }
117
+ if (vector instanceof UInt8Vector ) {
118
+ ((UInt8Vector ) vector ).set (0 , (long ) data );
119
+ return ;
120
+ }
121
+ if (vector instanceof VarBinaryVector ) {
122
+ ((VarBinaryVector ) vector ).set (0 , (byte []) data );
123
+ return ;
124
+ }
125
+ if (vector instanceof VarCharVector ) {
126
+ ((VarCharVector ) vector ).set (0 , (Text ) data );
127
+ return ;
128
+ }
129
+ if (vector instanceof UUIDVector ) {
130
+ ((UUIDVector ) vector ).set (0 , (java .util .UUID ) data );
131
+ return ;
132
+ }
133
+ if (vector instanceof JSONVector ) {
134
+ ((JSONVector ) vector ).setSafe (0 , (byte []) data );
135
+ return ;
136
+ }
137
+
138
+ throw new IllegalArgumentException ("Unsupported vector type: " + vector .getClass ());
139
+ }
140
+
31
141
public static ByteString encode (Table table ) throws IOException {
32
142
try (BufferAllocator bufferAllocator = new RootAllocator ()) {
33
143
Schema schema = toArrowSchema (table );
34
- VectorSchemaRoot schemaRoot = VectorSchemaRoot .create (schema , bufferAllocator );
35
- try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
36
- try (ArrowStreamWriter writer =
37
- new ArrowStreamWriter (schemaRoot , null , Channels .newChannel (out ))) {
38
- writer .start ();
39
- writer .end ();
40
- return ByteString .copyFrom (out .toByteArray ());
144
+ try (VectorSchemaRoot schemaRoot = VectorSchemaRoot .create (schema , bufferAllocator )) {
145
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
146
+ try (ArrowStreamWriter writer =
147
+ new ArrowStreamWriter (schemaRoot , null , Channels .newChannel (out ))) {
148
+ writer .start ();
149
+ writer .end ();
150
+ return ByteString .copyFrom (out .toByteArray ());
151
+ }
41
152
}
42
153
}
43
154
}
@@ -57,7 +168,18 @@ public static Schema toArrowSchema(Table table) {
57
168
Field [] fields = new Field [columns .size ()];
58
169
for (int i = 0 ; i < columns .size (); i ++) {
59
170
Column column = columns .get (i );
60
- Field field = Field .nullable (column .getName (), column .getType ());
171
+ Map <String , String > metadata = new HashMap <>();
172
+ metadata .put (CQ_EXTENSION_UNIQUE , column .isUnique () ? "true" : "false" );
173
+ metadata .put (CQ_EXTENSION_PRIMARY_KEY , column .isPrimaryKey () ? "true" : "false" );
174
+ if (column .getConstraintName () != null ) {
175
+ metadata .put (CQ_EXTENSION_CONSTRAINT_NAME , column .getConstraintName ());
176
+ }
177
+ metadata .put (CQ_EXTENSION_INCREMENTAL , column .isIncrementalKey () ? "true" : "false" );
178
+ Field field =
179
+ new Field (
180
+ column .getName (),
181
+ new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
182
+ null );
61
183
fields [i ] = field ;
62
184
}
63
185
Map <String , String > metadata = new HashMap <>();
@@ -77,7 +199,20 @@ public static Schema toArrowSchema(Table table) {
77
199
public static Table fromArrowSchema (Schema schema ) {
78
200
List <Column > columns = new ArrayList <>();
79
201
for (Field field : schema .getFields ()) {
80
- columns .add (Column .builder ().name (field .getName ()).type (field .getType ()).build ());
202
+ boolean isUnique = field .getMetadata ().get (CQ_EXTENSION_UNIQUE ) == "true" ;
203
+ boolean isPrimaryKey = field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ) == "true" ;
204
+ String constraintName = field .getMetadata ().get (CQ_EXTENSION_CONSTRAINT_NAME );
205
+ boolean isIncrementalKey = field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ) == "true" ;
206
+
207
+ columns .add (
208
+ Column .builder ()
209
+ .name (field .getName ())
210
+ .unique (isUnique )
211
+ .primaryKey (isPrimaryKey )
212
+ .incrementalKey (isIncrementalKey )
213
+ .constraintName (constraintName )
214
+ .type (field .getType ())
215
+ .build ());
81
216
}
82
217
83
218
Map <String , String > metaData = schema .getCustomMetadata ();
@@ -99,4 +234,25 @@ public static Table fromArrowSchema(Schema schema) {
99
234
100
235
return tableBuilder .build ();
101
236
}
237
+
238
+ public static ByteString encode (Resource resource ) throws IOException {
239
+ try (BufferAllocator bufferAllocator = new RootAllocator ()) {
240
+ Table table = resource .getTable ();
241
+ Schema schema = toArrowSchema (table );
242
+ try (VectorSchemaRoot schemaRoot = VectorSchemaRoot .create (schema , bufferAllocator )) {
243
+ for (int i = 0 ; i < table .getColumns ().size (); i ++) {
244
+ setVectorData (schemaRoot .getVector (i ), resource .getData ().get (i ).get ());
245
+ }
246
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
247
+ try (ArrowStreamWriter writer =
248
+ new ArrowStreamWriter (schemaRoot , null , Channels .newChannel (out ))) {
249
+ writer .start ();
250
+ writer .writeBatch ();
251
+ writer .end ();
252
+ return ByteString .copyFrom (out .toByteArray ());
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
102
258
}
0 commit comments