diff --git a/Porting/makerel b/Porting/makerel index ce553a952e95..a0ef3b6c1de2 100755 --- a/Porting/makerel +++ b/Porting/makerel @@ -26,7 +26,7 @@ use warnings; use ExtUtils::Manifest qw(manicheck); $ExtUtils::Manifest::Quiet = 1; -use Getopt::Std; +use Getopt::Long; use Digest::SHA; $|=1; @@ -40,19 +40,33 @@ usage: $0 [ -r rootdir ] [-s suffix ] [ -x ] [ -n ] in patchlevel.h (or blank, if none) -x make a .xz file in addition to a .gz file -n do not make any tarballs, just the directory - -c cleanup perform a cleanup before building: clean git repo and target + -c perform a cleanup before building: clean git repo and target directory/tarballs - -e Make the outputs be translated into EBCDIC. (They can then + -e[1047|037] Make the outputs be translated into EBCDIC. (They can then be downloaded directly to an EBCDIC platform without needing - any further translation.) + any further translation.) 1047 is assumed if the argument to + -e is omitted. EOF -my %opts; -getopts('exncr:s:', \%opts) or usage; - -@ARGV && usage; - -my $relroot = defined $opts{r} ? $opts{r} : ".."; +my $relroot = ".."; +my $suffix; +my $additional_xz = 0; +my $no_tarballs = 0; +my $cleanup_first = 0; +my $ebcdic_code_page = undef; + +GetOptions ( + "r=s" => \$relroot, + "s:s" => \$suffix, + "x" => \$additional_xz, + "n" => \$no_tarballs, + "c" => \$cleanup_first, + "e:s" => \$ebcdic_code_page, + ) +or die usage; + +$ebcdic_code_page = "1047" if defined $ebcdic_code_page + && ! $ebcdic_code_page; die "Must be in root of the perl source tree.\n" unless -f "./MANIFEST" and -f "patchlevel.h"; @@ -79,12 +93,12 @@ $lpatch_tags = join "-", @lpatch_tags; my $perl = "perl-$vers"; my $reldir = "$perl"; -$lpatch_tags = $opts{s} if defined $opts{s}; +$lpatch_tags = $suffix if defined $suffix; $reldir .= "-$lpatch_tags" if $lpatch_tags; print "\nMaking a release for $perl in $relroot/$reldir\n\n"; -cleanup($relroot, $reldir) if $opts{c}; +cleanup($relroot, $reldir) if $cleanup_first; print "Cross-checking the MANIFEST...\n"; my @missfile = manicheck(); @@ -100,7 +114,7 @@ print "\n"; print "Creating $relroot/$reldir release directory...\n"; die "$relroot/$reldir release directory already exists [consider using -c]\n" if -e "$relroot/$reldir"; die "$relroot/$reldir.tar.gz release file already exists [consider using -c]\n" if -e "$relroot/$reldir.tar.gz"; -die "$relroot/$reldir.tar.xz release file already exists [consider using -c]\n" if $opts{x} && -e "$relroot/$reldir.tar.xz"; +die "$relroot/$reldir.tar.xz release file already exists [consider using -c]\n" if $additional_xz && -e "$relroot/$reldir.tar.xz"; mkdir("$relroot/$reldir", 0755) or die "mkdir $relroot/$reldir: $!\n"; print "\n"; @@ -119,21 +133,100 @@ my @exe = map { my ($f) = split; glob($f) } map { split "\n" } do { local (@ARGV, $/) = 'Porting/exec-bit.txt'; <> }; -if ($opts{e}) { +if ($ebcdic_code_page) { + die "$0 must be run on an ASCII platform" if ord("A") != 65; + + my @known_binary_files = qw( + cpan/Archive-Tar/t/src/linktest/linktest_missing_dir.tar + cpan/Archive-Tar/t/src/linktest/linktest_with_dir.tar + cpan/Archive-Tar/t/src/long/bar.tar + cpan/Archive-Tar/t/src/long/foo.tbz + cpan/Archive-Tar/t/src/long/foo.tgz + cpan/Archive-Tar/t/src/long/prefix-directory-concat.tar + cpan/Archive-Tar/t/src/short/bar.tar + cpan/Archive-Tar/t/src/short/foo.tbz + cpan/Archive-Tar/t/src/short/foo.tgz + cpan/Encode/t/big5-eten.enc + cpan/Encode/t/big5-hkscs.enc + cpan/Encode/t/enc_module.enc + cpan/Encode/t/gb2312.enc + cpan/Encode/t/jisx0201.enc + cpan/Encode/t/jisx0208.enc + cpan/Encode/t/jisx0212.enc + cpan/Encode/t/ksc5601.enc + cpan/ExtUtils-MakeMaker/t/testdata/reallylongdirectoryname/arch1/Config.pm + cpan/ExtUtils-MakeMaker/t/testdata/reallylongdirectoryname/arch2/Config.pm + cpan/IO-Compress/t/files/bad-efs.zip + cpan/IO-Compress/t/files/encrypt-aes.zip + cpan/IO-Compress/t/files/encrypt-standard.zip + cpan/IO-Compress/t/files/jar.zip + cpan/IO-Compress/t/files/test.ods + cpan/IO-Compress/t/files/testfile1.odt + cpan/IO-Compress/t/files/time-invalid.zip + cpan/IO-Compress/t/files/time-zero.zip + cpan/IPC-Cmd/t/src/x.tgz + cpan/Pod-Checker/t/pod/empty.xr + os2/diff.configure + win32/perlexe.ico + ); + + my %known_binary_files; + $known_binary_files{$_} = 1 for @known_binary_files; + + my @known_alien_encoding_files = qw( + cpan/Pod-Simple/t/corpus/2202jp.txt + cpan/Pod-Simple/t/corpus/2202jpx.txt + cpan/Pod-Simple/t/corpus/2202jpy.txt + cpan/Pod-Simple/t/corpus/2202jpz.txt + cpan/Pod-Simple/t/corpus/8859_7.pod + cpan/Pod-Simple/t/corpus/cp1256.txt + cpan/Pod-Simple/t/corpus/fet_cont.txt + cpan/Pod-Simple/t/corpus/fet_dup.txt + cpan/Pod-Simple/t/corpus/iso6.txt + cpan/Pod-Simple/t/corpus/koi8r.txt + cpan/Pod-Simple/t/corpus/laozi38.txt + cpan/Pod-Simple/t/corpus/laozi38b.txt + cpan/Pod-Simple/t/corpus/laozi38p.pod + cpan/Pod-Simple/t/corpus/nonesuch.txt + cpan/Pod-Simple/t/corpus/pasternak_cp1251.txt + cpan/Pod-Simple/t/corpus/s2763_sjis.txt + cpan/Pod-Simple/t/corpus/thai_iso11.txt + cpan/Pod-Simple/t/corpus2/fiqhakbar_iso6.txt + cpan/Pod-Simple/t/enc-chars.t + cpan/Pod-Simple/t/encod02.t + cpan/Pod-Simple/t/encod03.t + ); + + my %known_alien_encoding_files; + $known_alien_encoding_files{$_} = 1 for @known_alien_encoding_files; + + my @known_utf16_files = qw( + cpan/CPAN-Meta-YAML/t/data/utf_16_le_bom.yml + cpan/Module-Metadata/corpus/BOMTest/UTF16BE.pm + cpan/Module-Metadata/corpus/BOMTest/UTF16LE.pm + cpan/Pod-Simple/t/corpus2/polish_utf16be_bom.txt + cpan/Pod-Simple/t/corpus2/polish_utf16le_bom.txt + ); + + my %known_utf16_files; + $known_utf16_files{$_} = 1 for @known_utf16_files; + require './regen/charset_translations.pl'; - # Translation tables, so far only to 1047 - my @charset = grep { /1047/ } get_supported_code_pages(); + # Translation tables + my @charset = grep { /$ebcdic_code_page/ } get_supported_code_pages(); my $charset = $charset[0]; my $a2e = get_a2n($charset); - die "$0 must be run on an ASCII platform" if ord("A") != 65; + my $LATIN1_TO_EBCDIC = ""; + $LATIN1_TO_EBCDIC .= quotemeta chr $a2e->[$_] for 0..255; print "Translating to EBCDIC...\n"; open my $mani_fh, "<", "MANIFEST" or die "Can't read copied MANIFEST: $!"; - my @manifest = <$mani_fh>; # Slurp in whole thing before the file gets trashed + my @manifest = <$mani_fh>; # Slurp in whole thing before the file gets + # trashed close $mani_fh or die "Couldn't close MANIFEST: $!"; while (defined ($_ = shift @manifest)) { chomp; @@ -151,12 +244,14 @@ if ($opts{e}) { if ($potential_BOM eq "\xFE\xFF") { $utf16_high = 0; $utf16_low = 1; - print STDERR "$file is UTF-16BE\n"; + print STDERR "$file is UTF-16BE\n" + unless $known_utf16_files{$file}; } elsif ($potential_BOM eq "\xFF\xFE") { $utf16_high = 1; $utf16_low = 0; - print STDERR "$file is UTF-16LE\n"; + print STDERR "$file is UTF-16LE\n" + unless $known_utf16_files{$file}; } if ($utf16_high || $utf16_low) { @@ -180,23 +275,38 @@ if ($opts{e}) { $xlated .= $cur; } } - elsif (-B $file) { # Binary files aren't translated - print STDERR "$file is binary\n"; - close $fh or die "Couldn't close $file: $!"; - next; - } else { - if ( ! utf8::decode($text) - || $text =~ / ^ [[:ascii:][:cntrl:]]* $ /x) - { - # Here, either $text isn't legal UTF-8; or it is, but it - # consists entirely of one of the 160 ASCII and control - # characters whose EBCDIC representation is the same whether - # UTF-EBCDIC or not. This means we just translate - # byte-by-byte from Latin1 to EBCDIC. - $xlated = ($text =~ s/(.)/chr $a2e->[ord $1]/rsge); + my $has_alien_encoding = 0; + if ($text =~ / ^ =encoding \s+ (\S*) \s* $ /mx) { + my $encoding = $1; + $has_alien_encoding = + $encoding !~ / ^ (?: utf -? 8 + | cp -? 1252 + | ascii + | (?: iso -? )? 8859 -? 1 \b + ) + \b + /ix; + } - else { + + if ($has_alien_encoding) { + print STDERR "$file has alien encoding\n" + unless $known_alien_encoding_files{$file}; + close $fh or die "Couldn't close $file: $!"; + next; + } + elsif ($text =~ / ^ [\000-\177]* $ /x) { + + # Here, $text consists entirely of one of the 160 ASCII and + # control characters whose EBCDIC representation is the same + # whether UTF-EBCDIC or not. Just translate directly + no warnings 'misc'; # Harmless here to have replacement + # string longer + $xlated = eval "\$text =~ tr/\000-\177/$LATIN1_TO_EBCDIC/r"; + die $@ if $@; + } + elsif (utf8::decode($text)) { # Here, $text is legal UTF-8, and the representation of some # character(s) in it it matters if is encoded in UTF-EBCDIC or @@ -205,6 +315,19 @@ if ($opts{e}) { # UTF-EBCDIC. $xlated = ($text =~ s/(.)/cp_2_utfbytes(ord $1, $charset)/rsge); } + elsif (! -T $file) { # Binary files aren't translated (don't use + # -B, because that accepts empty files) + print STDERR "$file is binary\n" + unless $known_binary_files{$file}; + close $fh or die "Couldn't close $file: $!"; + next; + } + else { + + # Here, $text isn't legal UTF-8; assume it is Latin1 + $xlated = eval "\$text =~ tr/\000-\377/$LATIN1_TO_EBCDIC/r"; + die $@ if $@; + } } # Overwrite the file with the translation @@ -269,7 +392,7 @@ warn $out if $out; chdir ".." or die $!; -exit if $opts{n}; +exit if $no_tarballs; my $src = (-e $perl) ? $perl : 'perl'; # 'perl' in maint branch @@ -277,7 +400,8 @@ my $tar_cmd = "tar cf - --format=ustar $reldir"; my $output_7z; my $have_7z; -if (! $opts{e}) { +if (! $ebcdic_code_page) { + print "Checking if you have 7z...\n"; $output_7z = `7z 2>&1`; $have_7z = defined $output_7z && $output_7z =~ /7-Zip/; @@ -302,7 +426,7 @@ if ($have_7z) { } } -if ($opts{x}) { +if ($additional_xz) { print "Creating and compressing the tar.xz file with xz...\n"; $cmd = "$tar_cmd | xz -z -c > $reldir.tar.xz"; system($cmd) == 0 or die "$cmd failed";