Skip to content

Commit f94d102

Browse files
committed
Fix UTF-8 BOM detection for non-ASCII
1 parent 367a64e commit f94d102

File tree

1 file changed

+23
-3
lines changed

1 file changed

+23
-3
lines changed

lib/Module/Metadata.pm

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,26 @@ sub _parse_version_expression {
493493
return ( $sigil, $variable_name, $package );
494494
}
495495

496+
my $UTF8_BOM;
497+
my $UTF8_BOM_BYTES_1_2;
498+
my $UTF8_BOM_BYTES_TRAILER;
499+
my $UTF8_BOM_BYTES_TRAILER_LENGTH;
500+
BEGIN {
501+
if ( "$]" >= 5.008 ) {
502+
503+
# Works on EBCDIC too
504+
$UTF8_BOM = "\x{FEFF}";
505+
utf8::encode($UTF8_BOM);
506+
}
507+
else {
508+
$UTF8_BOM = "\x{EF}\x{BB}\x{BF}";
509+
}
510+
511+
$UTF8_BOM_BYTES_1_2 = substr($UTF8_BOM, 0, 2);
512+
$UTF8_BOM_BYTES_TRAILER = substr($UTF8_BOM, 2);
513+
$UTF8_BOM_BYTES_TRAILER_LENGTH = length $UTF8_BOM_BYTES_TRAILER;
514+
}
515+
496516
# Look for a UTF-8/UTF-16BE/UTF-16LE BOM at the beginning of the stream.
497517
# If there's one, then skip it and set the :encoding layer appropriately.
498518
sub _handle_bom {
@@ -512,10 +532,10 @@ sub _handle_bom {
512532
elsif ( $buf eq "\x{FF}\x{FE}" ) {
513533
$encoding = 'UTF-16LE';
514534
}
515-
elsif ( $buf eq "\x{EF}\x{BB}" ) {
516-
$buf = ' ';
535+
elsif ( $buf eq $UTF8_BOM_BYTES_1_2 ) {
536+
$buf = ' ' x $UTF8_BOM_BYTES_TRAILER_LENGTH;
517537
$count = read $fh, $buf, length $buf;
518-
if ( defined $count and $count >= 1 and $buf eq "\x{BF}" ) {
538+
if ( defined $count and $count >= 1 and $buf eq $UTF8_BOM_BYTES_TRAILER ) {
519539
$encoding = 'UTF-8';
520540
}
521541
}

0 commit comments

Comments
 (0)