Skip to content

Commit e6ff972

Browse files
authored
Fold MaybeInvalidModule into Config (#1411)
* Fold `MaybeInvalidModule` into `Config` This commit removes the top-level `MaybeInvalidModule` type and instead folds it into a configuration option inside of `Config`. This has the benefit of being able to still configure all the other `Config` options during generating possibly-invalid modules at the cost that we can no longer statically rule out success of `ensure_termination`. This error is now propagated upwards and documented in a few places. Closes #1410 * Make anyhow non-optional for wasm-smith * Update fuzzers
1 parent f43b418 commit e6ff972

File tree

10 files changed

+123
-114
lines changed

10 files changed

+123
-114
lines changed

crates/wasm-smith/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ harness = false
1919
workspace = true
2020

2121
[dependencies]
22-
anyhow = { workspace = true, optional = true }
22+
anyhow = { workspace = true }
2323
arbitrary = { workspace = true, features = ["derive"] }
2424
clap = { workspace = true, optional = true }
2525
flagset = "0.4"
@@ -42,5 +42,5 @@ wat = { path = "../wat" }
4242
libfuzzer-sys = { workspace = true }
4343

4444
[features]
45-
_internal_cli = ["anyhow", "clap", "flagset/serde", "serde", "serde_derive", "wasmparser", "wat"]
45+
_internal_cli = ["clap", "flagset/serde", "serde", "serde_derive", "wasmparser", "wat"]
4646
wasmparser = ['dep:wasmparser', 'wasm-encoder/wasmparser']

crates/wasm-smith/src/config.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,18 @@ define_config! {
481481
///
482482
/// Defaults to `false`.
483483
pub threads_enabled: bool = false,
484+
485+
/// Indicates whether wasm-smith is allowed to generate invalid function
486+
/// bodies.
487+
///
488+
/// When enabled this option will enable taking raw bytes from the input
489+
/// byte stream and using them as a wasm function body. This means that
490+
/// the output module is not guaranteed to be valid but can help tickle
491+
/// various parts of validation/compilation in some circumstances as
492+
/// well.
493+
///
494+
/// Defaults to `false`.
495+
pub allow_invalid_funcs: bool = false,
484496
}
485497
}
486498

@@ -617,6 +629,7 @@ impl<'a> Arbitrary<'a> for Config {
617629
tail_call_enabled: false,
618630
gc_enabled: false,
619631
generate_custom_sections: false,
632+
allow_invalid_funcs: false,
620633
})
621634
}
622635
}

crates/wasm-smith/src/core.rs

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ impl Module {
172172
duplicate_imports_behavior: DuplicateImportsBehavior,
173173
) -> Result<Self> {
174174
let mut module = Module::empty(config, duplicate_imports_behavior);
175-
module.build(u, false)?;
175+
module.build(u)?;
176176
Ok(module)
177177
}
178178

@@ -213,30 +213,6 @@ impl Module {
213213
}
214214
}
215215

216-
/// Same as [`Module`], but may be invalid.
217-
///
218-
/// This module generates function bodies differnetly than `Module` to try to
219-
/// better explore wasm decoders and such.
220-
#[derive(Debug)]
221-
pub struct MaybeInvalidModule {
222-
module: Module,
223-
}
224-
225-
impl MaybeInvalidModule {
226-
/// Encode this Wasm module into bytes.
227-
pub fn to_bytes(&self) -> Vec<u8> {
228-
self.module.to_bytes()
229-
}
230-
}
231-
232-
impl<'a> Arbitrary<'a> for MaybeInvalidModule {
233-
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
234-
let mut module = Module::empty(Config::default(), DuplicateImportsBehavior::Allowed);
235-
module.build(u, true)?;
236-
Ok(MaybeInvalidModule { module })
237-
}
238-
}
239-
240216
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
241217
pub(crate) struct RecGroup {
242218
pub(crate) types: Vec<SubType>,
@@ -398,7 +374,7 @@ pub(crate) enum GlobalInitExpr {
398374
}
399375

400376
impl Module {
401-
fn build(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> {
377+
fn build(&mut self, u: &mut Unstructured) -> Result<()> {
402378
self.valtypes = configured_valtypes(&self.config);
403379

404380
// We attempt to figure out our available imports *before* creating the types section here,
@@ -425,7 +401,7 @@ impl Module {
425401
self.arbitrary_start(u)?;
426402
self.arbitrary_elems(u)?;
427403
self.arbitrary_data(u)?;
428-
self.arbitrary_code(u, allow_invalid)?;
404+
self.arbitrary_code(u)?;
429405
Ok(())
430406
}
431407

@@ -1795,11 +1771,11 @@ impl Module {
17951771
)
17961772
}
17971773

1798-
fn arbitrary_code(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> {
1774+
fn arbitrary_code(&mut self, u: &mut Unstructured) -> Result<()> {
17991775
self.code.reserve(self.num_defined_funcs);
18001776
let mut allocs = CodeBuilderAllocations::new(self);
18011777
for (_, ty) in self.funcs[self.funcs.len() - self.num_defined_funcs..].iter() {
1802-
let body = self.arbitrary_func_body(u, ty, &mut allocs, allow_invalid)?;
1778+
let body = self.arbitrary_func_body(u, ty, &mut allocs)?;
18031779
self.code.push(body);
18041780
}
18051781
allocs.finish(u, self)?;
@@ -1811,11 +1787,10 @@ impl Module {
18111787
u: &mut Unstructured,
18121788
ty: &FuncType,
18131789
allocs: &mut CodeBuilderAllocations,
1814-
allow_invalid: bool,
18151790
) -> Result<Code> {
18161791
let mut locals = self.arbitrary_locals(u)?;
18171792
let builder = allocs.builder(ty, &mut locals);
1818-
let instructions = if allow_invalid && u.arbitrary().unwrap_or(false) {
1793+
let instructions = if self.config.allow_invalid_funcs && u.arbitrary().unwrap_or(false) {
18191794
Instructions::Arbitrary(arbitrary_vec_u8(u)?)
18201795
} else {
18211796
Instructions::Generated(builder.arbitrary(u, self)?)

crates/wasm-smith/src/core/terminate.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use anyhow::{bail, Result};
23
use std::mem;
34
use wasm_encoder::BlockType;
45

@@ -12,7 +13,14 @@ impl Module {
1213
///
1314
/// The index of the fuel global is returned, so that you may control how
1415
/// much fuel the module is given.
15-
pub fn ensure_termination(&mut self, default_fuel: u32) -> u32 {
16+
///
17+
/// # Errors
18+
///
19+
/// Returns an error if any function body was generated with
20+
/// possibly-invalid bytes rather than being generated by wasm-smith. In
21+
/// such a situation this pass does not parse the input bytes and inject
22+
/// instructions, instead it returns an error.
23+
pub fn ensure_termination(&mut self, default_fuel: u32) -> Result<u32> {
1624
let fuel_global = self.globals.len() as u32;
1725
self.globals.push(GlobalType {
1826
val_type: ValType::I32,
@@ -41,10 +49,12 @@ impl Module {
4149

4250
let instrs = match &mut code.instructions {
4351
Instructions::Generated(list) => list,
44-
// only present on modules contained within
45-
// `MaybeInvalidModule`, which doesn't expose its internal
46-
// `Module`.
47-
Instructions::Arbitrary(_) => unreachable!(),
52+
Instructions::Arbitrary(_) => {
53+
bail!(
54+
"failed to ensure that a function generated due to it \
55+
containing arbitrary instructions"
56+
)
57+
}
4858
};
4959
let mut new_insts = Vec::with_capacity(instrs.len() * 2);
5060

@@ -65,6 +75,6 @@ impl Module {
6575
*instrs = new_insts;
6676
}
6777

68-
fuel_global
78+
Ok(fuel_global)
6979
}
7080
}

crates/wasm-smith/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ mod component;
5858
mod config;
5959
mod core;
6060

61-
pub use crate::core::{InstructionKind, InstructionKinds, MaybeInvalidModule, Module};
61+
pub use crate::core::{InstructionKind, InstructionKinds, Module};
6262
use arbitrary::{Result, Unstructured};
6363
pub use component::Component;
6464
pub use config::{Config, MemoryOffsetChoices};

crates/wasm-smith/tests/core.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fn smoke_test_ensure_termination() {
2727
rng.fill_bytes(&mut buf);
2828
let u = Unstructured::new(&buf);
2929
if let Ok(mut module) = Module::arbitrary_take_rest(u) {
30-
module.ensure_termination(10);
30+
module.ensure_termination(10).unwrap();
3131
let wasm_bytes = module.to_bytes();
3232

3333
let mut validator = Validator::new_with_features(wasm_features());

fuzz/src/lib.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub fn generate_valid_module(
4242
// still produce a valid module.
4343
if u.ratio(1, 10)? {
4444
log::debug!("ensuring termination with 100 fuel");
45-
module.ensure_termination(100);
45+
let _ = module.ensure_termination(100);
4646
}
4747

4848
log_wasm(&bytes, &config);
@@ -76,6 +76,27 @@ pub fn generate_valid_component(
7676
Ok((bytes, config))
7777
}
7878

79+
pub fn validator_for_config(config: &Config) -> wasmparser::Validator {
80+
wasmparser::Validator::new_with_features(wasmparser::WasmFeatures {
81+
multi_value: config.multi_value_enabled,
82+
multi_memory: config.max_memories > 1,
83+
bulk_memory: config.bulk_memory_enabled,
84+
reference_types: config.reference_types_enabled,
85+
simd: config.simd_enabled,
86+
relaxed_simd: config.relaxed_simd_enabled,
87+
memory64: config.memory64_enabled,
88+
threads: config.threads_enabled,
89+
exceptions: config.exceptions_enabled,
90+
// TODO: determine our larger story for function-references in
91+
// wasm-tools and whether we should just have a Wasm GC flag since
92+
// function-references is effectively part of the Wasm GC proposal at
93+
// this point.
94+
function_references: config.gc_enabled,
95+
gc: config.gc_enabled,
96+
..wasmparser::WasmFeatures::default()
97+
})
98+
}
99+
79100
// Optionally log the module and its configuration if we've gotten this
80101
// far. Note that we don't do this unconditionally to avoid slowing down
81102
// fuzzing, but this is expected to be enabled when debugging a failing

fuzz/src/validate.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,33 @@
1-
use arbitrary::{Arbitrary, Result, Unstructured};
2-
use wasm_smith::MaybeInvalidModule;
1+
use arbitrary::{Result, Unstructured};
32
use wasmparser::{Validator, WasmFeatures};
43

54
pub fn run(u: &mut Unstructured<'_>) -> Result<()> {
5+
// Either use `wasm-smith` to generate a module with possibly invalid
6+
// functions or try validating raw bytes from the input itself.
7+
if u.arbitrary()? {
8+
validate_maybe_invalid_module(u)?;
9+
} else {
10+
validate_raw_bytes(u)?;
11+
}
12+
Ok(())
13+
}
14+
15+
pub fn validate_maybe_invalid_module(u: &mut Unstructured<'_>) -> Result<()> {
16+
// Generate a "valid" module but specifically allow invalid functions which
17+
// means that some functions may be defined from the input bytes raw. This
18+
// means that most of the module is valid but only some functions may be
19+
// invalid which can help stress various bits and pieces of validation.
20+
let (wasm, config) = crate::generate_valid_module(u, |config, _| {
21+
config.allow_invalid_funcs = true;
22+
Ok(())
23+
})?;
24+
let mut validator = crate::validator_for_config(&config);
25+
drop(validator.validate_all(&wasm));
26+
Ok(())
27+
}
28+
29+
pub fn validate_raw_bytes(u: &mut Unstructured<'_>) -> Result<()> {
30+
// Enable arbitrary combinations of features to validate the input bytes.
631
let mut validator = Validator::new_with_features(WasmFeatures {
732
reference_types: u.arbitrary()?,
833
multi_value: u.arbitrary()?,
@@ -26,18 +51,8 @@ pub fn run(u: &mut Unstructured<'_>) -> Result<()> {
2651
component_model_values: u.arbitrary()?,
2752
component_model_nested_names: u.arbitrary()?,
2853
});
29-
let use_maybe_invalid = u.arbitrary()?;
30-
31-
if use_maybe_invalid {
32-
if let Ok(module) = MaybeInvalidModule::arbitrary(u) {
33-
let wasm = module.to_bytes();
34-
crate::log_wasm(&wasm, "");
35-
drop(validator.validate_all(&wasm));
36-
}
37-
} else {
38-
let wasm = u.bytes(u.len())?;
39-
crate::log_wasm(wasm, "");
40-
drop(validator.validate_all(wasm));
41-
}
54+
let wasm = u.bytes(u.len())?;
55+
crate::log_wasm(wasm, "");
56+
drop(validator.validate_all(wasm));
4257
Ok(())
4358
}

fuzz/src/validate_valid_module.rs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,7 @@ pub fn run(u: &mut Unstructured<'_>) -> Result<()> {
2525
};
2626

2727
// Validate the module or component and assert that it passes validation.
28-
let mut validator = wasmparser::Validator::new_with_features(wasmparser::WasmFeatures {
29-
component_model: generate_component,
30-
multi_value: config.multi_value_enabled,
31-
multi_memory: config.max_memories > 1,
32-
bulk_memory: config.bulk_memory_enabled,
33-
reference_types: config.reference_types_enabled,
34-
simd: config.simd_enabled,
35-
relaxed_simd: config.relaxed_simd_enabled,
36-
memory64: config.memory64_enabled,
37-
threads: config.threads_enabled,
38-
exceptions: config.exceptions_enabled,
39-
// TODO: determine our larger story for function-references in
40-
// wasm-tools and whether we should just have a Wasm GC flag since
41-
// function-references is effectively part of the Wasm GC proposal at
42-
// this point.
43-
function_references: config.gc_enabled,
44-
gc: config.gc_enabled,
45-
..wasmparser::WasmFeatures::default()
46-
});
28+
let mut validator = crate::validator_for_config(&config);
4729
if let Err(e) = validator.validate_all(&wasm_bytes) {
4830
let component_or_module = if generate_component {
4931
"component"

0 commit comments

Comments
 (0)