Skip to content

Commit 2fef79b

Browse files
gszadovszkywgtmac
authored andcommitted
GH-3198: Allow specifying trusted classes by class name (#3199)
1 parent aedfbbc commit 2fef79b

10 files changed

+335
-170
lines changed

parquet-avro/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ Apache Avro integration
3232
| `parquet.avro.read.schema` | `String` | The Avro schema to be used for reading. It shall be compatible with the file schema. The file schema will be used directly if not set. |
3333
| `parquet.avro.projection` | `String` | The Avro schema to be used for projection. |
3434
| `parquet.avro.compatible` | `boolean` | Flag for compatibility mode. `true` for materializing Avro `IndexedRecord` objects, `false` for materializing the related objects for either generic, specific, or reflect records.<br/>The default value is `true`. |
35-
| `parquet.avro.readInt96AsFixed` | `boolean` | Flag for handling the `INT96` Parquet types. `true` for converting it to the `fixed` Avro type, `false` for not handling `INT96` types (throwing exception).<br/>The default value is `false`.<br/>**NOTE: The `INT96` Parquet type is deprecated. This option is only to support old data.** |
35+
| `parquet.avro.readInt96AsFixed` | `boolean` | Flag for handling the `INT96` Parquet types. `true` for converting it to the `fixed` Avro type, `false` for not handling `INT96` types (throwing exception).<br/>The default value is `false`.<br/>**NOTE: The `INT96` Parquet type is deprecated. This option is only to support old data.** |
36+
| `parquet.avro.serializable.classes` | `String` | List of the fully qualified class names separated by ',' that may be referenced from the Avro schema by "java-class" or "java-key-class" and are allowed to be loaded. |
3637

3738
### Configuration for writing
3839

parquet-avro/src/main/java/org/apache/parquet/avro/AvroConverters.java

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
import java.lang.reflect.Constructor;
2222
import java.lang.reflect.InvocationTargetException;
2323
import java.nio.ByteBuffer;
24-
import java.util.Arrays;
25-
import java.util.List;
2624
import org.apache.avro.Schema;
2725
import org.apache.avro.generic.GenericData;
2826
import org.apache.avro.util.Utf8;
@@ -36,13 +34,19 @@
3634

3735
public class AvroConverters {
3836

37+
/**
38+
* Contains the packages which classes are allowed to be loaded that may be referenced from the Avro schema by
39+
* "java-class" or "java-key-class". It contains the packages parsed from system variable
40+
* "org.apache.parquet.avro.SERIALIZABLE_PACKAGES".
41+
*
42+
* @deprecated will be removed in 2.0.0
43+
*/
44+
@Deprecated
3945
public static final String[] SERIALIZABLE_PACKAGES;
4046

4147
static {
42-
SERIALIZABLE_PACKAGES = System.getProperty(
43-
"org.apache.parquet.avro.SERIALIZABLE_PACKAGES",
44-
"java.lang,java.math,java.io,java.net,org.apache.parquet.avro")
45-
.split(",");
48+
String prop = System.getProperty("org.apache.parquet.avro.SERIALIZABLE_PACKAGES");
49+
SERIALIZABLE_PACKAGES = prop == null ? new String[0] : prop.split(",");
4650
}
4751

4852
public abstract static class AvroGroupConverter extends GroupConverter {
@@ -272,7 +276,6 @@ static final class FieldStringableConverter extends BinaryConverter<Object> {
272276

273277
public FieldStringableConverter(ParentValueContainer parent, Class<?> stringableClass) {
274278
super(parent);
275-
checkSecurity(stringableClass);
276279
stringableName = stringableClass.getName();
277280
try {
278281
this.ctor = stringableClass.getConstructor(String.class);
@@ -289,33 +292,6 @@ public Object convert(Binary binary) {
289292
throw new ParquetDecodingException("Cannot convert binary to " + stringableName, e);
290293
}
291294
}
292-
293-
private void checkSecurity(Class<?> clazz) throws SecurityException {
294-
List<String> trustedPackages = Arrays.asList(SERIALIZABLE_PACKAGES);
295-
296-
boolean trustAllPackages = trustedPackages.size() == 1 && "*".equals(trustedPackages.get(0));
297-
if (trustAllPackages || clazz.isPrimitive()) {
298-
return;
299-
}
300-
301-
boolean found = false;
302-
Package thePackage = clazz.getPackage();
303-
if (thePackage != null) {
304-
for (String trustedPackage : trustedPackages) {
305-
if (thePackage.getName().equals(trustedPackage)
306-
|| thePackage.getName().startsWith(trustedPackage + ".")) {
307-
found = true;
308-
break;
309-
}
310-
}
311-
if (!found) {
312-
throw new SecurityException("Forbidden " + clazz
313-
+ "! This class is not trusted to be included in Avro schema using java-class."
314-
+ " Please set org.apache.parquet.avro.SERIALIZABLE_PACKAGES system property"
315-
+ " with the packages you trust.");
316-
}
317-
}
318-
}
319295
}
320296

321297
static final class FieldEnumConverter extends BinaryConverter<Object> {

parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
package org.apache.parquet.avro;
2020

2121
import java.io.IOException;
22+
import java.util.stream.Collectors;
23+
import java.util.stream.Stream;
2224
import org.apache.avro.generic.GenericData;
2325
import org.apache.avro.generic.GenericRecord;
2426
import org.apache.avro.specific.SpecificData;
@@ -179,6 +181,26 @@ public Builder<T> withDataModel(GenericData model) {
179181
return this;
180182
}
181183

184+
public Builder<T> withSerializableClasses(String... classNames) {
185+
if (classNames.length == 0) {
186+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, null);
187+
} else {
188+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, String.join(",", classNames));
189+
}
190+
return this;
191+
}
192+
193+
public Builder<T> withSerializableClasses(Class<?>... classes) {
194+
if (classes.length == 0) {
195+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, null);
196+
} else {
197+
configuration.set(
198+
AvroReadSupport.SERIALIZABLE_CLASSES,
199+
Stream.of(classes).map(Class::getName).collect(Collectors.joining(",")));
200+
}
201+
return this;
202+
}
203+
182204
public Builder<T> disableCompatibility() {
183205
this.enableCompatibility = false;
184206
return this;

parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import java.util.LinkedHashMap;
2222
import java.util.Map;
23+
import java.util.stream.Collectors;
24+
import java.util.stream.Stream;
2325
import org.apache.avro.Schema;
2426
import org.apache.avro.generic.GenericData;
2527
import org.apache.hadoop.conf.Configuration;
@@ -61,6 +63,12 @@ public class AvroReadSupport<T> extends ReadSupport<T> {
6163
public static final String READ_INT96_AS_FIXED = "parquet.avro.readInt96AsFixed";
6264
public static final boolean READ_INT96_AS_FIXED_DEFAULT = false;
6365

66+
/**
67+
* List of the fully qualified class names separated by ',' that may be referenced from the Avro schema by
68+
* "java-class" or "java-key-class" and are allowed to be loaded.
69+
*/
70+
public static final String SERIALIZABLE_CLASSES = "parquet.avro.serializable.classes";
71+
6472
/**
6573
* @param configuration a configuration
6674
* @param requestedProjection the requested projection schema
@@ -83,6 +91,24 @@ public static void setAvroDataSupplier(Configuration configuration, Class<? exte
8391
configuration.set(AVRO_DATA_SUPPLIER, clazz.getName());
8492
}
8593

94+
public static void setSerializableClasses(Configuration configuration, String... classNames) {
95+
if (classNames.length == 0) {
96+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, null);
97+
} else {
98+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, String.join(",", classNames));
99+
}
100+
}
101+
102+
public static void setSerializableClasses(Configuration configuration, Class<?>... classes) {
103+
if (classes.length == 0) {
104+
configuration.set(AvroReadSupport.SERIALIZABLE_CLASSES, null);
105+
} else {
106+
configuration.set(
107+
AvroReadSupport.SERIALIZABLE_CLASSES,
108+
Stream.of(classes).map(Class::getName).collect(Collectors.joining(",")));
109+
}
110+
}
111+
86112
private GenericData model = null;
87113

88114
public AvroReadSupport() {}
@@ -158,7 +184,15 @@ public RecordMaterializer<T> prepareForRead(
158184
if (Boolean.parseBoolean(compatEnabled)) {
159185
return newCompatMaterializer(parquetSchema, avroSchema, model);
160186
}
161-
return new AvroRecordMaterializer<T>(parquetSchema, avroSchema, model);
187+
String[] serializableClasses = configuration.getStrings(SERIALIZABLE_CLASSES, null);
188+
189+
return new AvroRecordMaterializer<T>(
190+
parquetSchema,
191+
avroSchema,
192+
model,
193+
serializableClasses == null
194+
? new ReflectClassValidator.PackageValidator()
195+
: new ReflectClassValidator.ClassValidator(serializableClasses));
162196
}
163197

164198
@SuppressWarnings("unchecked")

0 commit comments

Comments
 (0)