Skip to content

Commit e423fcf

Browse files
committed
std: Enforce Unicode in fmt::Writer
This commit is an implementation of [RFC 526][rfc] which is a change to alter the definition of the old `fmt::FormatWriter`. The new trait, renamed to `Writer`, now only exposes one method `write_str` in order to guarantee that all implementations of the formatting traits can only produce valid Unicode. [rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0526-fmt-text-writer.md One of the primary improvements of this patch is the performance of the `.to_string()` method by avoiding an almost-always redundant UTF-8 check. This is a breaking change due to the renaming of the trait as well as the loss of the `write` method, but migration paths should be relatively easy: * All usage of `write` should move to `write_str`. If truly binary data was being written in an implementation of `Show`, then it will need to use a different trait or an altogether different code path. * All usage of `write!` should continue to work as-is with no modifications. * All usage of `Show` where implementations just delegate to another should continue to work as-is. [breaking-change] Closes #20352
1 parent cd61416 commit e423fcf

File tree

25 files changed

+319
-358
lines changed

25 files changed

+319
-358
lines changed

src/libcollections/string.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -995,9 +995,11 @@ pub trait ToString {
995995

996996
impl<T: fmt::Show> ToString for T {
997997
fn to_string(&self) -> String {
998-
let mut buf = Vec::<u8>::new();
999-
let _ = fmt::write(&mut buf, format_args!("{}", *self));
1000-
String::from_utf8(buf).unwrap()
998+
use core::fmt::Writer;
999+
let mut buf = String::new();
1000+
let _ = buf.write_fmt(format_args!("{}", self));
1001+
buf.shrink_to_fit();
1002+
buf
10011003
}
10021004
}
10031005

@@ -1073,6 +1075,13 @@ impl<'a> Str for CowString<'a> {
10731075
}
10741076
}
10751077

1078+
impl fmt::Writer for String {
1079+
fn write_str(&mut self, s: &str) -> fmt::Result {
1080+
self.push_str(s);
1081+
Ok(())
1082+
}
1083+
}
1084+
10761085
#[cfg(test)]
10771086
mod tests {
10781087
use prelude::*;

src/libcollections/vec.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,9 +1488,9 @@ impl<T:fmt::Show> fmt::Show for Vec<T> {
14881488
}
14891489
}
14901490

1491-
impl<'a> fmt::FormatWriter for Vec<u8> {
1492-
fn write(&mut self, buf: &[u8]) -> fmt::Result {
1493-
self.push_all(buf);
1491+
impl<'a> fmt::Writer for Vec<u8> {
1492+
fn write_str(&mut self, s: &str) -> fmt::Result {
1493+
self.push_all(s.as_bytes());
14941494
Ok(())
14951495
}
14961496
}

src/libcore/fmt/float.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use num::FpCategory as Fp;
2323
use ops::FnOnce;
2424
use result::Result::Ok;
2525
use slice::{mod, SliceExt};
26-
use str::StrExt;
26+
use str::{mod, StrExt};
2727

2828
/// A flag that specifies whether to use exponential (scientific) notation.
2929
pub enum ExponentFormat {
@@ -95,7 +95,7 @@ pub fn float_to_str_bytes_common<T: Float, U, F>(
9595
exp_upper: bool,
9696
f: F
9797
) -> U where
98-
F: FnOnce(&[u8]) -> U,
98+
F: FnOnce(&str) -> U,
9999
{
100100
assert!(2 <= radix && radix <= 36);
101101
match exp_format {
@@ -109,12 +109,12 @@ pub fn float_to_str_bytes_common<T: Float, U, F>(
109109
let _1: T = Float::one();
110110

111111
match num.classify() {
112-
Fp::Nan => return f("NaN".as_bytes()),
112+
Fp::Nan => return f("NaN"),
113113
Fp::Infinite if num > _0 => {
114-
return f("inf".as_bytes());
114+
return f("inf");
115115
}
116116
Fp::Infinite if num < _0 => {
117-
return f("-inf".as_bytes());
117+
return f("-inf");
118118
}
119119
_ => {}
120120
}
@@ -314,11 +314,11 @@ pub fn float_to_str_bytes_common<T: Float, U, F>(
314314
end: &'a mut uint,
315315
}
316316

317-
impl<'a> fmt::FormatWriter for Filler<'a> {
318-
fn write(&mut self, bytes: &[u8]) -> fmt::Result {
317+
impl<'a> fmt::Writer for Filler<'a> {
318+
fn write_str(&mut self, s: &str) -> fmt::Result {
319319
slice::bytes::copy_memory(self.buf.slice_from_mut(*self.end),
320-
bytes);
321-
*self.end += bytes.len();
320+
s.as_bytes());
321+
*self.end += s.len();
322322
Ok(())
323323
}
324324
}
@@ -332,5 +332,5 @@ pub fn float_to_str_bytes_common<T: Float, U, F>(
332332
}
333333
}
334334

335-
f(buf[..end])
335+
f(unsafe { str::from_utf8_unchecked(buf[..end]) })
336336
}

src/libcore/fmt/mod.rs

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use result::Result::{Ok, Err};
2424
use result;
2525
use slice::SliceExt;
2626
use slice;
27-
use str::{StrExt, Utf8Error};
27+
use str::{mod, StrExt, Utf8Error};
2828

2929
pub use self::num::radix;
3030
pub use self::num::Radix;
@@ -57,7 +57,7 @@ pub struct Error;
5757
/// library. The `write!` macro accepts an instance of `io::Writer`, and the
5858
/// `io::Writer` trait is favored over implementing this trait.
5959
#[experimental = "waiting for core and I/O reconciliation"]
60-
pub trait FormatWriter {
60+
pub trait Writer {
6161
/// Writes a slice of bytes into this writer, returning whether the write
6262
/// succeeded.
6363
///
@@ -68,7 +68,7 @@ pub trait FormatWriter {
6868
/// # Errors
6969
///
7070
/// This function will return an instance of `FormatError` on error.
71-
fn write(&mut self, bytes: &[u8]) -> Result;
71+
fn write_str(&mut self, s: &str) -> Result;
7272

7373
/// Glue for usage of the `write!` macro with implementers of this trait.
7474
///
@@ -88,7 +88,7 @@ pub struct Formatter<'a> {
8888
width: Option<uint>,
8989
precision: Option<uint>,
9090

91-
buf: &'a mut (FormatWriter+'a),
91+
buf: &'a mut (Writer+'a),
9292
curarg: slice::Iter<'a, Argument<'a>>,
9393
args: &'a [Argument<'a>],
9494
}
@@ -258,17 +258,6 @@ pub trait UpperExp for Sized? {
258258
fn fmt(&self, &mut Formatter) -> Result;
259259
}
260260

261-
static DEFAULT_ARGUMENT: rt::Argument<'static> = rt::Argument {
262-
position: rt::ArgumentNext,
263-
format: rt::FormatSpec {
264-
fill: ' ',
265-
align: rt::AlignUnknown,
266-
flags: 0,
267-
precision: rt::CountImplied,
268-
width: rt::CountImplied,
269-
}
270-
};
271-
272261
/// The `write` function takes an output stream, a precompiled format string,
273262
/// and a list of arguments. The arguments will be formatted according to the
274263
/// specified format string into the output stream provided.
@@ -279,7 +268,7 @@ static DEFAULT_ARGUMENT: rt::Argument<'static> = rt::Argument {
279268
/// * args - the precompiled arguments generated by `format_args!`
280269
#[experimental = "libcore and I/O have yet to be reconciled, and this is an \
281270
implementation detail which should not otherwise be exported"]
282-
pub fn write(output: &mut FormatWriter, args: Arguments) -> Result {
271+
pub fn write(output: &mut Writer, args: Arguments) -> Result {
283272
let mut formatter = Formatter {
284273
flags: 0,
285274
width: None,
@@ -296,16 +285,16 @@ pub fn write(output: &mut FormatWriter, args: Arguments) -> Result {
296285
match args.fmt {
297286
None => {
298287
// We can use default formatting parameters for all arguments.
299-
for _ in range(0, args.args.len()) {
300-
try!(formatter.buf.write(pieces.next().unwrap().as_bytes()));
301-
try!(formatter.run(&DEFAULT_ARGUMENT));
288+
for (arg, piece) in args.args.iter().zip(pieces.by_ref()) {
289+
try!(formatter.buf.write_str(*piece));
290+
try!((arg.formatter)(arg.value, &mut formatter));
302291
}
303292
}
304293
Some(fmt) => {
305294
// Every spec has a corresponding argument that is preceded by
306295
// a string piece.
307296
for (arg, piece) in fmt.iter().zip(pieces.by_ref()) {
308-
try!(formatter.buf.write(piece.as_bytes()));
297+
try!(formatter.buf.write_str(*piece));
309298
try!(formatter.run(arg));
310299
}
311300
}
@@ -314,7 +303,7 @@ pub fn write(output: &mut FormatWriter, args: Arguments) -> Result {
314303
// There can be only one trailing string piece left.
315304
match pieces.next() {
316305
Some(piece) => {
317-
try!(formatter.buf.write(piece.as_bytes()));
306+
try!(formatter.buf.write_str(*piece));
318307
}
319308
None => {}
320309
}
@@ -378,7 +367,7 @@ impl<'a> Formatter<'a> {
378367
pub fn pad_integral(&mut self,
379368
is_positive: bool,
380369
prefix: &str,
381-
buf: &[u8])
370+
buf: &str)
382371
-> Result {
383372
use char::Char;
384373
use fmt::rt::{FlagAlternate, FlagSignPlus, FlagSignAwareZeroPad};
@@ -402,9 +391,10 @@ impl<'a> Formatter<'a> {
402391
for c in sign.into_iter() {
403392
let mut b = [0; 4];
404393
let n = c.encode_utf8(&mut b).unwrap_or(0);
405-
try!(f.buf.write(b[..n]));
394+
let b = unsafe { str::from_utf8_unchecked(b[0..n]) };
395+
try!(f.buf.write_str(b));
406396
}
407-
if prefixed { f.buf.write(prefix.as_bytes()) }
397+
if prefixed { f.buf.write_str(prefix) }
408398
else { Ok(()) }
409399
};
410400

@@ -413,24 +403,26 @@ impl<'a> Formatter<'a> {
413403
// If there's no minimum length requirements then we can just
414404
// write the bytes.
415405
None => {
416-
try!(write_prefix(self)); self.buf.write(buf)
406+
try!(write_prefix(self)); self.buf.write_str(buf)
417407
}
418408
// Check if we're over the minimum width, if so then we can also
419409
// just write the bytes.
420410
Some(min) if width >= min => {
421-
try!(write_prefix(self)); self.buf.write(buf)
411+
try!(write_prefix(self)); self.buf.write_str(buf)
422412
}
423413
// The sign and prefix goes before the padding if the fill character
424414
// is zero
425415
Some(min) if self.flags & (1 << (FlagSignAwareZeroPad as uint)) != 0 => {
426416
self.fill = '0';
427417
try!(write_prefix(self));
428-
self.with_padding(min - width, rt::AlignRight, |f| f.buf.write(buf))
418+
self.with_padding(min - width, rt::AlignRight, |f| {
419+
f.buf.write_str(buf)
420+
})
429421
}
430422
// Otherwise, the sign and prefix goes after the padding
431423
Some(min) => {
432424
self.with_padding(min - width, rt::AlignRight, |f| {
433-
try!(write_prefix(f)); f.buf.write(buf)
425+
try!(write_prefix(f)); f.buf.write_str(buf)
434426
})
435427
}
436428
}
@@ -451,7 +443,7 @@ impl<'a> Formatter<'a> {
451443
pub fn pad(&mut self, s: &str) -> Result {
452444
// Make sure there's a fast path up front
453445
if self.width.is_none() && self.precision.is_none() {
454-
return self.buf.write(s.as_bytes());
446+
return self.buf.write_str(s);
455447
}
456448
// The `precision` field can be interpreted as a `max-width` for the
457449
// string being formatted
@@ -463,7 +455,7 @@ impl<'a> Formatter<'a> {
463455
let char_len = s.char_len();
464456
if char_len >= max {
465457
let nchars = ::cmp::min(max, char_len);
466-
return self.buf.write(s.slice_chars(0, nchars).as_bytes());
458+
return self.buf.write_str(s.slice_chars(0, nchars));
467459
}
468460
}
469461
None => {}
@@ -472,17 +464,17 @@ impl<'a> Formatter<'a> {
472464
match self.width {
473465
// If we're under the maximum length, and there's no minimum length
474466
// requirements, then we can just emit the string
475-
None => self.buf.write(s.as_bytes()),
467+
None => self.buf.write_str(s),
476468
// If we're under the maximum width, check if we're over the minimum
477469
// width, if so it's as easy as just emitting the string.
478470
Some(width) if s.char_len() >= width => {
479-
self.buf.write(s.as_bytes())
471+
self.buf.write_str(s)
480472
}
481473
// If we're under both the maximum and the minimum width, then fill
482474
// up the minimum width with the specified string + some alignment.
483475
Some(width) => {
484476
self.with_padding(width - s.char_len(), rt::AlignLeft, |me| {
485-
me.buf.write(s.as_bytes())
477+
me.buf.write_str(s)
486478
})
487479
}
488480
}
@@ -507,15 +499,16 @@ impl<'a> Formatter<'a> {
507499

508500
let mut fill = [0u8; 4];
509501
let len = self.fill.encode_utf8(&mut fill).unwrap_or(0);
502+
let fill = unsafe { str::from_utf8_unchecked(fill[..len]) };
510503

511504
for _ in range(0, pre_pad) {
512-
try!(self.buf.write(fill[..len]));
505+
try!(self.buf.write_str(fill));
513506
}
514507

515508
try!(f(self));
516509

517510
for _ in range(0, post_pad) {
518-
try!(self.buf.write(fill[..len]));
511+
try!(self.buf.write_str(fill));
519512
}
520513

521514
Ok(())
@@ -524,8 +517,8 @@ impl<'a> Formatter<'a> {
524517
/// Writes some data to the underlying buffer contained within this
525518
/// formatter.
526519
#[unstable = "reconciling core and I/O may alter this definition"]
527-
pub fn write(&mut self, data: &[u8]) -> Result {
528-
self.buf.write(data)
520+
pub fn write_str(&mut self, data: &str) -> Result {
521+
self.buf.write_str(data)
529522
}
530523

531524
/// Writes some formatted information into this instance
@@ -616,7 +609,9 @@ impl Show for char {
616609
impl<T> Pointer for *const T {
617610
fn fmt(&self, f: &mut Formatter) -> Result {
618611
f.flags |= 1 << (rt::FlagAlternate as uint);
619-
LowerHex::fmt(&(*self as uint), f)
612+
let ret = LowerHex::fmt(&(*self as uint), f);
613+
f.flags &= !(1 << (rt::FlagAlternate as uint));
614+
ret
620615
}
621616
}
622617

src/libcore/fmt/num.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use fmt;
1818
use iter::DoubleEndedIteratorExt;
1919
use num::{Int, cast};
2020
use slice::SliceExt;
21+
use str;
2122

2223
/// A type that represents a specific radix
2324
#[doc(hidden)]
@@ -60,7 +61,8 @@ trait GenericRadix {
6061
if x == zero { break }; // No more digits left to accumulate.
6162
}
6263
}
63-
f.pad_integral(is_positive, self.prefix(), buf[curr..])
64+
let buf = unsafe { str::from_utf8_unchecked(buf[curr..]) };
65+
f.pad_integral(is_positive, self.prefix(), buf)
6466
}
6567
}
6668

src/librustc_driver/driver.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use rustc_trans::save;
2828
use rustc_trans::trans;
2929
use rustc_typeck as typeck;
3030

31-
use serialize::{json, Encodable};
31+
use serialize::json;
3232

3333
use std::io;
3434
use std::io::fs;
@@ -143,10 +143,7 @@ pub fn phase_1_parse_input(sess: &Session, cfg: ast::CrateConfig, input: &Input)
143143
});
144144

145145
if sess.opts.debugging_opts & config::AST_JSON_NOEXPAND != 0 {
146-
let mut stdout = io::BufferedWriter::new(io::stdout());
147-
let mut json = json::PrettyEncoder::new(&mut stdout);
148-
// unwrapping so IoError isn't ignored
149-
krate.encode(&mut json).unwrap();
146+
println!("{}", json::as_json(&krate));
150147
}
151148

152149
if sess.show_span() {
@@ -338,10 +335,7 @@ pub fn assign_node_ids_and_map<'ast>(sess: &Session,
338335
ast_map::map_crate(forest, NodeIdAssigner { sess: sess }));
339336

340337
if sess.opts.debugging_opts & config::AST_JSON != 0 {
341-
let mut stdout = io::BufferedWriter::new(io::stdout());
342-
let mut json = json::PrettyEncoder::new(&mut stdout);
343-
// unwrapping so IoError isn't ignored
344-
map.krate().encode(&mut json).unwrap();
338+
println!("{}", json::as_json(map.krate()));
345339
}
346340

347341
map

0 commit comments

Comments
 (0)