diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs
index 4d182be02c9e9..3422b08db7736 100644
--- a/library/alloc/tests/str.rs
+++ b/library/alloc/tests/str.rs
@@ -1,7 +1,7 @@
 use std::assert_matches::assert_matches;
 use std::borrow::Cow;
 use std::cmp::Ordering::{Equal, Greater, Less};
-use std::str::{from_utf8, from_utf8_unchecked};
+use std::str::{from_utf8, from_utf8_unchecked, Utf8Error};
 
 #[test]
 fn test_le() {
@@ -983,6 +983,250 @@ fn from_utf8_error() {
     test!(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(3));
 }
 
+const fn utf8_error_eq(e: &Utf8Error, valid: usize, err_len: Option<usize>) -> bool {
+    e.valid_up_to() == valid
+        && match (err_len, e.error_len()) {
+            (Some(a), Some(b)) => a == b,
+            (None, None) => true,
+            _ => false,
+        }
+}
+
+#[test]
+fn from_utf8_error_offset() {
+    const N: usize = if cfg!(miri) { 8 } else { 64 };
+
+    #[track_caller]
+    fn check(input: &[u8], valid: usize, err_len: Option<usize>) {
+        let mut buf = Vec::with_capacity(input.len() + N * N * N * 4);
+        for i in 0..N {
+            for j in 0..N {
+                buf.clear();
+                buf.extend(core::iter::repeat(b'a').take(i));
+                buf.extend(core::iter::repeat(*b"\xE6\x88\x91").take(i).flat_map(|n| n));
+                let bump = buf.len();
+                buf.extend_from_slice(input);
+
+                assert!(
+                    utf8_error_eq(&from_utf8(&buf).unwrap_err(), bump + valid, err_len),
+                    "offset ({i}, {j}, _): on {input:?} ({buf:?})"
+                );
+                for k in 0..N {
+                    if k != 0 {
+                        buf.extend(*b"\xD0\xB6")
+                    }
+                    let error = from_utf8(&buf).unwrap_err();
+                    let real_err = from_utf8(&buf).unwrap_err();
+                    assert_eq!(
+                        error, real_err,
+                        "(vs ref) offset ({i}, {j}, {k}): on {input:?} ({buf:?})"
+                    );
+                }
+            }
+        }
+    }
+    check(b"A\xC3\xA9 \xFF ", 4, Some(1));
+    check(b"A\xC3\xA9 \x80 ", 4, Some(1));
+    check(b"A\xC3\xA9 \xC1 ", 4, Some(1));
+    check(b"A\xC3\xA9 \xC1", 4, Some(1));
+    check(b"A\xC3\xA9 \xC2", 4, None);
+    check(b"A\xC3\xA9 \xC2 ", 4, Some(1));
+    check(b"A\xC3\xA9 \xC2\xC0", 4, Some(1));
+    check(b"A\xC3\xA9 \xE0", 4, None);
+    check(b"A\xC3\xA9 \xE0\x9F", 4, Some(1));
+    check(b"A\xC3\xA9 \xE0\xA0", 4, None);
+    check(b"A\xC3\xA9 \xE0\xA0\xC0", 4, Some(2));
+    check(b"A\xC3\xA9 \xE0\xA0 ", 4, Some(2));
+    check(b"A\xC3\xA9 \xED\xA0\x80 ", 4, Some(1));
+    check(b"A\xC3\xA9 \xF1", 4, None);
+    check(b"A\xC3\xA9 \xF1\x80", 4, None);
+    check(b"A\xC3\xA9 \xF1\x80\x80", 4, None);
+    check(b"A\xC3\xA9 \xF1 ", 4, Some(1));
+    check(b"A\xC3\xA9 \xF1\x80 ", 4, Some(2));
+    check(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(3));
+    check(b"\xc3\x28", 0, Some(1));
+    check(b"\xa0\xa1", 0, Some(1));
+    check(b"\xe2\x28\xa1", 0, Some(1));
+    check(b"\xe2\x82\x28", 0, Some(2));
+    check(b"\xf0\x28\x8c\xbc", 0, Some(1));
+    check(b"\xf0\x90\x28\xbc", 0, Some(2));
+    check(b"\xf0\x28\x8c\x28", 0, Some(1));
+    check(b"\xc0\x9f", 0, Some(1));
+    check(b"\xf5\xff\xff\xff", 0, Some(1));
+    check(b"\xed\xa0\x81", 0, Some(1));
+    check(b"\xf8\x90\x80\x80\x80", 0, Some(1));
+    check(b"123456789012345\xed", 15, None);
+    check(b"123456789012345\xf1", 15, None);
+    check(b"123456789012345\xc2", 15, None);
+    check(b"\xC2\x7F", 0, Some(1));
+    check(b"\xce", 0, None);
+    check(b"\xce\xba\xe1", 2, None);
+    check(b"\xce\xba\xe1\xbd", 2, None);
+    check(b"\xce\xba\xe1\xbd\xb9\xcf", 5, None);
+    check(b"\xce\xba\xe1\xbd\xb9\xcf\x83\xce", 7, None);
+    check(b"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce", 9, None);
+    check(b"\xdf", 0, None);
+    check(b"\xef\xbf", 0, None);
+    check(b"\x80", 0, Some(1));
+    check(b"\x91\x85\x95\x9e", 0, Some(1));
+    check(b"\x6c\x02\x8e\x18", 2, Some(1));
+    check(b"\xFF", 0, Some(1));
+    check(b"a\xFF", 1, Some(1));
+    check(b"\xCE\xB2\xFF", 2, Some(1));
+    check(b"\xE2\x98\x83\xFF", 3, Some(1));
+    check(b"\xF0\x9D\x9D\xB1\xFF", 4, Some(1));
+    check(b"\xCE\xF0", 0, Some(1));
+    check(b"\xE2\x98\xF0", 0, Some(2));
+    check(b"\xF0\x9D\x9D\xF0", 0, Some(3));
+    check(b"\xF0\x82\x82\xAC", 0, Some(1));
+    check(b"a\xF0\x82\x82\xAC", 1, Some(1));
+    check(b"\xE2\x98\x83\xF0\x82\x82\xAC", 3, Some(1));
+    check(b"\xED\xA0\x80", 0, Some(1));
+    check(b"\xE2\x98\x83\xED\xA0\x80", 3, Some(1));
+    check(b"\xE2\x98\x83\xCE\xE2\x98\x83", 3, Some(1));
+    check(b"\xCEa", 0, Some(1));
+    check(b"a\xCEa", 1, Some(1));
+    check(b"\xE2\x98\x83\xE2\x98\xE2\x98\x83", 3, Some(2));
+    // check(b"\xF0\x9D\x9Ca", 3, Some(2));
+    check(b"\xE2\x98a", 0, Some(2));
+    check(b"a\xE2\x98a", 1, Some(2));
+    check(b"\xF0\x9D\x9Ca", 0, Some(3));
+    check(b"a\xF0\x9D\x9Ca", 1, Some(3));
+    check(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C\xE2\x98\x83", 4, Some(3));
+    check(b"foobar\xF1\x80\x80quux", 6, Some(3));
+    check(b"\xCE", 0, None);
+    check(b"a\xCE", 1, None);
+    check(b"\xE2\x98\x83\xCE", 3, None);
+    check(b"\xE2\x98", 0, None);
+    check(b"a\xE2\x98", 1, None);
+    check(b"\xE2\x98\x83\xE2\x98", 3, None);
+    check(b"\xF0\x9D\x9C", 0, None);
+    check(b"a\xF0\x9D\x9C", 1, None);
+    check(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C", 4, None);
+    check(b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF", 8, Some(1));
+}
+
+#[test]
+fn utf8_error_cases_const() {
+    macro_rules! expect_utf8_error {
+        ($bytes:expr, $valid:expr, $elen:expr $(,)?) => {{
+            assert!(utf8_error_eq(&from_utf8($bytes).unwrap_err(), $valid, $elen));
+            const _: () = match from_utf8($bytes) {
+                Ok(_) => panic!(concat!("shouldn't pass: ", stringify!($bytes))),
+                Err(e) => assert!(utf8_error_eq(&e, $valid, $elen)),
+            };
+        }};
+    }
+    expect_utf8_error!(b"\xc3\x28", 0, Some(1));
+    expect_utf8_error!(b"\xa0\xa1", 0, Some(1));
+    expect_utf8_error!(b"\xe2\x28\xa1", 0, Some(1));
+    expect_utf8_error!(b"\xe2\x82\x28", 0, Some(2));
+    expect_utf8_error!(b"\xf0\x28\x8c\xbc", 0, Some(1));
+    expect_utf8_error!(b"\xf0\x90\x28\xbc", 0, Some(2));
+    expect_utf8_error!(b"\xf0\x28\x8c\x28", 0, Some(1));
+    expect_utf8_error!(b"\xc0\x9f", 0, Some(1));
+    expect_utf8_error!(b"\xf5\xff\xff\xff", 0, Some(1));
+    expect_utf8_error!(b"\xed\xa0\x81", 0, Some(1));
+    expect_utf8_error!(b"\xf8\x90\x80\x80\x80", 0, Some(1));
+    expect_utf8_error!(b"123456789012345\xed", 15, None);
+    expect_utf8_error!(b"123456789012345\xf1", 15, None);
+    expect_utf8_error!(b"123456789012345\xc2", 15, None);
+    expect_utf8_error!(b"\xC2\x7F", 0, Some(1));
+    expect_utf8_error!(b"\xce", 0, None);
+    expect_utf8_error!(b"\xce\xba\xe1", 2, None);
+    expect_utf8_error!(b"\xce\xba\xe1\xbd", 2, None);
+    expect_utf8_error!(b"\xce\xba\xe1\xbd\xb9\xcf", 5, None);
+    expect_utf8_error!(b"\xce\xba\xe1\xbd\xb9\xcf\x83\xce", 7, None);
+    expect_utf8_error!(b"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce", 9, None);
+    expect_utf8_error!(b"\xdf", 0, None);
+    expect_utf8_error!(b"\xef\xbf", 0, None);
+    expect_utf8_error!(b"\x80", 0, Some(1));
+    expect_utf8_error!(b"\x91\x85\x95\x9e", 0, Some(1));
+    expect_utf8_error!(b"\x6c\x02\x8e\x18", 2, Some(1));
+    expect_utf8_error!(
+        &[
+            0x25, 0x5b, 0x6e, 0x2c, 0x32, 0x2c, 0x5b, 0x5b, 0x33, 0x2c, 0x34, 0x2c, 0x05, 0x29,
+            0x2c, 0x33, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x5b, 0x5b,
+            0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x5d, 0x2c, 0x35, 0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x33, 0x2c, 0x37, 0x2e, 0x33,
+            0x2c, 0x39, 0x2e, 0x34, 0x2c, 0x37, 0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x33, 0x2c, 0x37,
+            0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x34, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d,
+            0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x20, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x23, 0x0a, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x7e, 0x7e, 0x0a, 0x0a, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5d, 0x2c, 0x37, 0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x33,
+            0x2c, 0x37, 0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x34, 0x2c, 0x37, 0x2e, 0x33, 0x2c, 0x39,
+            0x2e, 0x33, 0x2c, 0x37, 0x2e, 0x33, 0x2c, 0x39, 0x2e, 0x34, 0x5d, 0x5d, 0x5d, 0x5d,
+            0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x5d, 0x01, 0x01, 0x80,
+            0x01, 0x01, 0x01, 0x79, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01,
+        ],
+        335,
+        Some(1),
+    );
+    expect_utf8_error!(
+        &[
+            0x5bu8, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, b'0', 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01,
+        ],
+        15,
+        Some(1),
+    );
+    expect_utf8_error!(
+        &[
+            0x20, 0x0b, 0x01, 0x01, 0x01, 0x64, 0x3a, 0x64, 0x3a, 0x64, 0x3a, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b,
+            0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x30, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+            0x01, 0x01, 0x01, 0x01, 0x01u8,
+        ],
+        172,
+        Some(1),
+    );
+}
+
 #[test]
 fn test_as_bytes() {
     // no null
@@ -996,6 +1240,29 @@ fn test_as_bytes() {
     assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
 }
 
+#[test]
+#[cfg(not(miri))]
+fn from_utf8_all_chars() {
+    for i in 0..(0x10FFFF + 1) {
+        let Some(cp) = char::from_u32(i) else {
+            continue;
+        };
+        let mut buf = [0; 4];
+        let s: &str = cp.encode_utf8(&mut buf);
+        assert_eq!(Ok(s), from_utf8(s.as_bytes()));
+    }
+}
+
+#[test]
+fn test_multi() {
+    assert!(from_utf8(b"abc").is_ok());
+    assert!(from_utf8(b"a\xE2\x98\x83a").is_ok());
+    assert!(from_utf8(b"a\xF0\x9D\x9C\xB7a").is_ok());
+    assert!(from_utf8(b"\xE2\x98\x83\xF0\x9D\x9C\xB7").is_ok());
+    assert!(from_utf8(b"a\xE2\x98\x83a\xF0\x9D\x9C\xB7a").is_ok());
+    assert!(from_utf8(b"\xEF\xBF\xBD\xE2\x98\x83\xEF\xBF\xBD").is_ok());
+}
+
 #[test]
 #[should_panic]
 fn test_as_bytes_fail() {
diff --git a/library/core/src/str/validations.rs b/library/core/src/str/validations.rs
index 2acef432f2063..292d5a12c4542 100644
--- a/library/core/src/str/validations.rs
+++ b/library/core/src/str/validations.rs
@@ -1,6 +1,6 @@
 //! Operations related to UTF-8 validation.
-
-use crate::mem;
+mod utf8_dfa;
+pub(super) use utf8_dfa::run_utf8_validation;
 
 use super::Utf8Error;
 
@@ -112,135 +112,6 @@ where
     Some(ch)
 }
 
-const NONASCII_MASK: usize = usize::repeat_u8(0x80);
-
-/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
-#[inline]
-const fn contains_nonascii(x: usize) -> bool {
-    (x & NONASCII_MASK) != 0
-}
-
-/// Walks through `v` checking that it's a valid UTF-8 sequence,
-/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
-#[inline(always)]
-#[rustc_const_unstable(feature = "str_internals", issue = "none")]
-pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
-    let mut index = 0;
-    let len = v.len();
-
-    let usize_bytes = mem::size_of::<usize>();
-    let ascii_block_size = 2 * usize_bytes;
-    let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
-    let align = v.as_ptr().align_offset(usize_bytes);
-
-    while index < len {
-        let old_offset = index;
-        macro_rules! err {
-            ($error_len: expr) => {
-                return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len })
-            };
-        }
-
-        macro_rules! next {
-            () => {{
-                index += 1;
-                // we needed data, but there was none: error!
-                if index >= len {
-                    err!(None)
-                }
-                v[index]
-            }};
-        }
-
-        let first = v[index];
-        if first >= 128 {
-            let w = utf8_char_width(first);
-            // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
-            //        first  C2 80        last DF BF
-            // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
-            //        first  E0 A0 80     last EF BF BF
-            //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
-            //               ED A0 80 to       ED BF BF
-            // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
-            //        first  F0 90 80 80  last F4 8F BF BF
-            //
-            // Use the UTF-8 syntax from the RFC
-            //
-            // https://tools.ietf.org/html/rfc3629
-            // UTF8-1      = %x00-7F
-            // UTF8-2      = %xC2-DF UTF8-tail
-            // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
-            //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
-            // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
-            //               %xF4 %x80-8F 2( UTF8-tail )
-            match w {
-                2 => {
-                    if next!() as i8 >= -64 {
-                        err!(Some(1))
-                    }
-                }
-                3 => {
-                    match (first, next!()) {
-                        (0xE0, 0xA0..=0xBF)
-                        | (0xE1..=0xEC, 0x80..=0xBF)
-                        | (0xED, 0x80..=0x9F)
-                        | (0xEE..=0xEF, 0x80..=0xBF) => {}
-                        _ => err!(Some(1)),
-                    }
-                    if next!() as i8 >= -64 {
-                        err!(Some(2))
-                    }
-                }
-                4 => {
-                    match (first, next!()) {
-                        (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
-                        _ => err!(Some(1)),
-                    }
-                    if next!() as i8 >= -64 {
-                        err!(Some(2))
-                    }
-                    if next!() as i8 >= -64 {
-                        err!(Some(3))
-                    }
-                }
-                _ => err!(Some(1)),
-            }
-            index += 1;
-        } else {
-            // Ascii case, try to skip forward quickly.
-            // When the pointer is aligned, read 2 words of data per iteration
-            // until we find a word containing a non-ascii byte.
-            if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
-                let ptr = v.as_ptr();
-                while index < blocks_end {
-                    // SAFETY: since `align - index` and `ascii_block_size` are
-                    // multiples of `usize_bytes`, `block = ptr.add(index)` is
-                    // always aligned with a `usize` so it's safe to dereference
-                    // both `block` and `block.add(1)`.
-                    unsafe {
-                        let block = ptr.add(index) as *const usize;
-                        // break if there is a nonascii byte
-                        let zu = contains_nonascii(*block);
-                        let zv = contains_nonascii(*block.add(1));
-                        if zu || zv {
-                            break;
-                        }
-                    }
-                    index += ascii_block_size;
-                }
-                // step from the point where the wordwise loop stopped
-                while index < len && v[index] < 128 {
-                    index += 1;
-                }
-            } else {
-                index += 1;
-            }
-        }
-    }
-
-    Ok(())
-}
-
 // https://tools.ietf.org/html/rfc3629
 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
     // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
diff --git a/library/core/src/str/validations/utf8_dfa.rs b/library/core/src/str/validations/utf8_dfa.rs
new file mode 100644
index 0000000000000..8fa219f1eae22
--- /dev/null
+++ b/library/core/src/str/validations/utf8_dfa.rs
@@ -0,0 +1,324 @@
+//! FIXME docs/writeup/etc.
+use super::Utf8Error;
+use core::intrinsics::{const_eval_select, likely, unlikely};
+
+/// Transition table for the Shift-DFA.
+// Align to an cache line boundary to reduce the cache footprint. This is just a
+// rough approximation, and doesn't really need to be perfect.
+#[cfg_attr(target_pointer_width = "64", repr(C, align(128)))]
+#[cfg_attr(target_pointer_width = "32", repr(C, align(64)))]
+struct DfaTransitions([u32; 256]);
+
+// State IDs we need to reference in the code. These and the transition table
+// were generated by small program that drives a solver (FIXME link), which is
+// why we're able to use a 32 bit rows rather than the traditional 64 bit.
+const ERR: u32 = 0;
+const END: u32 = 17;
+
+#[rustfmt::skip]
+const DFA: &DfaTransitions = {
+    const ILL: u32 = 0b00000000000000000000000000000000;
+    const X00: u32 = 0b00000000001000100000000000000000;
+    const XC2: u32 = 0b00000000000011000000000000000000;
+    const XE0: u32 = 0b00000000000110000000000000000000;
+    const XE1: u32 = 0b00000000000010000000000000000000;
+    const XED: u32 = 0b00000000001011000000000000000000;
+    const XF0: u32 = 0b00000000001100100000000000000000;
+    const XF1: u32 = 0b00000000000100000000000000000000;
+    const XF4: u32 = 0b00000000001110000000000000000000;
+    const X80: u32 = 0b01000001100000000000010001100000;
+    const X90: u32 = 0b00001001100000000000010001100000;
+    const XA0: u32 = 0b00001000000000000110010001100000;
+
+    &DfaTransitions([
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+        X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00, X00,
+
+        X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80, X80,
+        X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90, X90,
+
+        XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0,
+        XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0, XA0,
+
+        ILL, ILL, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2,
+        XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2, XC2,
+
+        XE0, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XE1, XED, XE1, XE1,
+        XF0, XF1, XF1, XF1, XF4, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL,
+    ])
+};
+
+// Use a generic to help the compiler out some — we pass both `&[u8]` and `&[u8;
+// CHUNK_LEN]` in here, and would like it to know about the constant.
+#[must_use]
+#[inline]
+fn dfa_run(mut state: u32, chunk: impl AsRef<[u8]>) -> u32 {
+    for &byte in chunk.as_ref() {
+        state = (DFA.0[byte as usize] as u32) >> (state & 31);
+    }
+    state & 31
+}
+
+// advance the DFA a single step, returning the new masked state. If you have a
+// slice you should use `dfa_run` instead; it's usually more efficient.
+#[must_use]
+#[inline(always)]
+const fn dfa_step(state: u32, byte: u8) -> u32 {
+    ((DFA.0[byte as usize] as u32) >> (state & 31)) & 31
+}
+
+// Note: not trivial to change.
+const CHUNK_LEN: usize = 16;
+
+/// Walks through `v` checking that it's a valid UTF-8 sequence,
+/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
+#[inline]
+#[rustc_const_unstable(feature = "str_internals", issue = "none")]
+pub(crate) const fn run_utf8_validation(inp: &[u8]) -> Result<(), Utf8Error> {
+    const fn validate_utf8_const(inp: &[u8]) -> Result<(), Utf8Error> {
+        validate_carefully(inp, 0)
+    }
+
+    #[inline]
+    fn validate_utf8_rt(inp: &[u8]) -> Result<(), Utf8Error> {
+        match inp.len() {
+            0 => Ok(()),
+            1..=CHUNK_LEN => {
+                if is_ascii_small(inp) {
+                    Ok(())
+                } else {
+                    validate_carefully(inp, 0)
+                }
+            }
+            _ => validate_utf8_impl(inp),
+        }
+    }
+
+    // SAFETY: These are equivalent aside from optimizations.
+    unsafe { const_eval_select((inp,), validate_utf8_const, validate_utf8_rt) }
+}
+
+/// Validation code which isn't particularly optimized, but can produce an
+/// accurate `Utf8Error` (and works in `const`), but
+#[inline]
+const fn validate_carefully(input: &[u8], mut pos: usize) -> Result<(), Utf8Error> {
+    let mut state = END;
+    let mut valid_up_to = pos;
+    while pos < input.len() {
+        state = dfa_step(state, input[pos]);
+        match state {
+            END => valid_up_to = pos + 1,
+            ERR => {
+                debug_assert!(pos >= valid_up_to && pos - valid_up_to <= 3);
+                let error_len =
+                    Some(if pos == valid_up_to { 1 } else { (pos - valid_up_to) as u8 });
+                return Err(Utf8Error { valid_up_to, error_len });
+            }
+            // Keep going
+            _ => {}
+        }
+        pos += 1;
+    }
+    if state != END { Err(Utf8Error { valid_up_to, error_len: None }) } else { Ok(()) }
+}
+
+#[inline]
+fn validate_utf8_impl(inp: &[u8]) -> Result<(), Utf8Error> {
+    let mut state = END;
+    let mut pos = 0;
+    let (chunks, tail) = inp.as_chunks::<CHUNK_LEN>();
+    if !chunks.is_empty() {
+        let mut last_state = state;
+        let mut chunk_iter = chunks.iter();
+        while let Some(chunk) = chunk_iter.next() {
+            if state == END && all_ascii_chunk(chunk) {
+                let skipped = skip_ascii_chunks(&mut chunk_iter);
+                pos += skipped * CHUNK_LEN;
+            } else {
+                state = dfa_run(state, chunk);
+                if unlikely(state == ERR) {
+                    break;
+                }
+                last_state = state;
+            }
+            pos += core::mem::size_of_val(chunk);
+        }
+        if unlikely(state == ERR) {
+            return Err(utf8_find_error(inp, pos, last_state != END));
+        }
+    }
+    // Did we leave the optimized loop in the middle of a UTF-8 sequence?
+    let was_mid_char = state != END;
+    debug_assert!(state != ERR);
+    if !tail.is_empty() {
+        // Check and early return if the last CHUNK_LEN bytes were all ASCII. The
+        // motivation here is to avoid bringing the DFA table into the cache for
+        // pure ASCII.
+        //
+        // 1. avoid touching the DFA table for pure-ASCII input.
+        // 2. add a branch into the `dfa_run` inner loop.
+        //
+        // So we check and see if the last CHUNK_LEN bytes were all ASCII. This does
+        // compare with a few bytes that we've already processed, but handling
+        // that is not required for correctness (and doing so seems ot hurt
+        // performance)
+        if !was_mid_char && inp.len() >= CHUNK_LEN && tail.len() < CHUNK_LEN {
+            use crate::convert::TryFrom;
+            let range = (inp.len() - CHUNK_LEN)..inp.len();
+            debug_assert!(range.contains(&pos), "{:?}", (range, pos));
+            if all_ascii_chunk(<&[u8; CHUNK_LEN]>::try_from(&inp[range]).unwrap()) {
+                return Ok(());
+            }
+        }
+
+        state = dfa_run(state, tail);
+    }
+
+    if likely(state == END) {
+        return Ok(());
+    }
+    let (index, backup) =
+        if state == ERR { (inp.len() - tail.len(), was_mid_char) } else { (inp.len(), true) };
+    Err(utf8_find_error(inp, index, backup))
+}
+
+#[inline]
+fn backup_not_yet_invalid(inp: &[u8], mut pos: usize) -> usize {
+    debug_assert!(!inp.is_empty() && inp.get(..pos).is_some());
+    while pos != 0 {
+        pos -= 1;
+        let is_cont = (inp[pos] & 0b1100_0000) == 0b1000_0000;
+        if !is_cont {
+            break;
+        }
+    }
+    pos
+}
+
+#[cold]
+fn utf8_find_error(input: &[u8], mut pos: usize, backup: bool) -> Utf8Error {
+    debug_assert!(!input.is_empty());
+    if backup {
+        pos = backup_not_yet_invalid(input, pos);
+    }
+    validate_carefully(input, pos).unwrap_err()
+}
+
+#[inline]
+fn skip_ascii_chunks(s: &mut core::slice::Iter<'_, [u8; CHUNK_LEN]>) -> usize {
+    let mut i = 0;
+    let initial_slice = s.as_slice();
+    while let Some(c) = s.next() {
+        if !all_ascii_chunk(c) {
+            break;
+        }
+        i += 1;
+    }
+    *s = initial_slice[i..].iter();
+    i
+}
+
+#[inline]
+fn all_ascii_chunk(s: &[u8; CHUNK_LEN]) -> bool {
+    // Sadly, `core::simd` currently does not compile very efficiently on some
+    // targets (all the targets without simd, and some of the targets with it).
+    //
+    // It's also somewhat untested on others, so out of an abundance of caution
+    // we avoid it on any target that isn't both:
+    // - Known to support it efficiently.
+    // - Actually something we'd use and test on in the versions of libcore we
+    //   ship.
+    const SIMD_ASCII_TEST: bool = cfg!(any(
+        all(any(target_arch = "x86_64", target_arch = "x86"), target_feature = "sse2",),
+        all(target_arch = "aarch64", target_feature = "neon"),
+    ));
+
+    if SIMD_ASCII_TEST {
+        use crate::simd::*;
+        // Workaround for <https://github.com/rust-lang/portable-simd/issues/321> :(
+        let simd_chunk = Simd::<u8, CHUNK_LEN>::from_array(*s);
+        if cfg!(target_arch = "aarch64") {
+            simd_chunk.reduce_max() < 0x80
+        } else {
+            const ALL_HI: Simd<u8, CHUNK_LEN> = Simd::from_array([0x80; CHUNK_LEN]);
+            const ZERO: Simd<u8, CHUNK_LEN> = Simd::from_array([0; CHUNK_LEN]);
+            (simd_chunk & ALL_HI).simd_eq(ZERO).all()
+        }
+    } else {
+        // On targets where `core::simd` doesn't compile to efficient code we
+        // manually do the equivalent using u64-based SWAR using u64. Using u64 and
+        // not `usize` here seems better on 32 bit which have 64 bit register
+        // access, but ends up just being an extra unroll step on ones which don't
+        // (so no worse, and possibly still better).
+        type SwarWord = u64;
+        const WORD_BYTES: usize = core::mem::size_of::<SwarWord>();
+        const _: () = assert!((CHUNK_LEN % WORD_BYTES) == 0 && CHUNK_LEN != 0);
+        let (arr, rem) = s.as_chunks::<WORD_BYTES>();
+        debug_assert!(rem.is_empty() && !arr.is_empty());
+        let mut combined = 0;
+        for word_bytes in arr {
+            combined |= SwarWord::from_ne_bytes(*word_bytes);
+        }
+        const ALL_HI: SwarWord = SwarWord::from_ne_bytes([0x80; WORD_BYTES]);
+        (combined & ALL_HI) == 0
+    }
+}
+
+#[inline]
+fn is_ascii_small(s: &[u8]) -> bool {
+    // LLVM seems to get pretty aggressive if we use a loop here, even if we
+    // check the length first, which can end up having some pretty disasterous
+    // impacts on performance, seemingly due to inlining(?). In any case.
+    //
+    // Note that doing this for many sizes
+    //
+    // It ends up causing performance problems (probably due to lower
+    // willingness to inline. Instead of that, we handle a small number of
+    // len-ranges by doing reads that intentionally overlap for some of the
+    // slice lengths. Note that going overboard here will result in branch
+    // prediction issues, so this is intentionally minimal -- just enough to
+    // handle lengths up to `CHUNK_LEN` without pain
+    match s.len() {
+        // Actually handled in caller,
+        0 => true,
+        1..=3 => {
+            // Note: If `a`, `b`, and `c` are all ASCII bytes, then `a | b | c`
+            // will be too.
+            let all_bytes_ored = s[0] | s[s.len() / 2] | s[s.len() - 1];
+            (all_bytes_ored & 0x80) == 0
+        }
+        4..=16 => {
+            // SAFETY: `off..(off + 4)` should be in bounds for `n`.
+            #[inline(always)]
+            unsafe fn read32_unchecked(n: &[u8], off: usize) -> u32 {
+                debug_assert!(n.get(off..(off + core::mem::size_of::<u32>())).is_some());
+                // SAFETY: requirements passed on to caller
+                unsafe { n.as_ptr().add(off).cast::<u32>().read_unaligned() }
+            }
+            // SAFETY: All these reads are guaranteed to be in-bounds for all
+            // `s.len()` values in the between 4..=16 range. Sadly, the compiler
+            // doesn't seem to be able to remove bounds checks on expressions
+            // involving `mid_round_down` (no matter how I phrase it), so we need
+            // the unsafe.
+            let all_u32_ored = unsafe {
+                let mid_round_down = (s.len() / 2) & !3;
+                let tail = s.len() - 4;
+                read32_unchecked(s, 0)
+                    | read32_unchecked(s, mid_round_down)
+                    | read32_unchecked(s, tail - mid_round_down)
+                    | read32_unchecked(s, tail)
+            };
+            (all_u32_ored & 0x80808080) == 0
+        }
+        _ => false,
+    }
+}