package Archive::SevenZip; use strict; use Carp qw(croak); use Encode qw( decode encode ); use File::Basename qw(dirname basename); use Archive::SevenZip::Entry; use File::Temp qw(tempfile tempdir); use File::Copy; use IPC::Open3 'open3'; use Path::Class; use Exporter 'import'; # for the error codes, in Archive::Zip API compatibility =head1 NAME Archive::SevenZip - Read/write 7z , zip , ISO9960 and other archives =head1 SYNOPSIS my $ar = Archive::SevenZip->new( find => 1, archivename => $archivename, verbose => $verbose, ); for my $entry ( $ar->list ) { my $target = join "/", "$target_dir", $entry->basename; $ar->extractMember( $entry->fileName, $target ); }; =head1 METHODS =cut use vars qw(%sevenzip_charsetname %class_defaults $VERSION @EXPORT_OK %EXPORT_TAGS); $VERSION= '0.06'; # Archive::Zip API # Error codes use constant AZ_OK => 0; use constant COMPRESSION_STORED => 'Store'; # file is stored (no compression) use constant COMPRESSION_DEFLATED => 'Deflate'; # file is Deflated @EXPORT_OK = (qw(AZ_OK COMPRESSION_STORED COMPRESSION_DEFLATED)); %EXPORT_TAGS = ( ERROR_CODES => [ qw( AZ_OK ) #AZ_STREAM_END #AZ_ERROR #AZ_FORMAT_ERROR #AZ_IO_ERROR ], CONSTANTS => [ qw(COMPRESSION_STORED COMPRESSION_DEFLATED) ], ); %sevenzip_charsetname = ( 'UTF-8' => 'UTF-8', 'Latin-1' => 'WIN', 'ISO-8859-1' => 'WIN', '' => 'DOS', # dunno what the appropriate name would be ); if( $^O !~ /MSWin/ ) { # Wipe all filesystem encodings because my Debian 7z 9.20 doesn't understand them $sevenzip_charsetname{ $_ } = '' for keys %sevenzip_charsetname; }; %class_defaults = ( '7zip' => '7z', fs_encoding => 'UTF-8', default_options => [ "-y", "-bd" ], type => 'zip', ); =head2 C<< Archive::SevenZip->find_7z_executable >> Finds the 7z executable in the path or in C<< $ENV{ProgramFiles} >> or C<< $ENV{ProgramFiles(x86)} >>. This is called when a C<< Archive::SevenZip >> instance is created with the C parameter set to 1. =cut sub find_7z_executable { my($class) = @_; my $old_default = $class_defaults{ '7zip' }; my $envsep = $^O =~ /MSWin/ ? ';' : ':'; my @search = split /$envsep/, $ENV{PATH}; if( $^O =~ /MSWin/i ) { push @search, map { "$_\\7-Zip" } grep {defined} ($ENV{'ProgramFiles'}, $ENV{'ProgramFiles(x86)'}); }; my $found = $class->version; while( ! defined $found and @search) { my $dir = shift @search; if ($^O eq 'MSWin32') { next unless -e file("$dir", "7z.exe" ); } $class_defaults{'7zip'} = "" . file("$dir", "7z" ); $found = $class->version; }; if( ! $found) { $class_defaults{ '7zip' } = $old_default; }; return defined $found ? $found : () } =head2 C<< Archive::SevenZip->new >> my $ar = Archive::SevenZip->new( $archivename ); my $ar = Archive::SevenZip->new( archivename => $archivename, find => 1, ); Creates a new class instance. C - will try to find the executable using C<< ->find_7z_executable >> =cut sub new { my( $class, %options); if( @_ == 2 ) { ($class, $options{ archivename }) = @_; } else { ($class, %options) = @_; }; if( $options{ find }) { $class->find_7z_executable(); }; for( keys %class_defaults ) { $options{ $_ } = $class_defaults{ $_ } unless defined $options{ $_ }; }; bless \%options => $class } sub version { my( $self_or_class, %options) = @_; for( keys %class_defaults ) { $options{ $_ } = $class_defaults{ $_ } unless defined $options{ $_ }; }; my $self = ref $self_or_class ? $self_or_class : $self_or_class->new( %options ); my $cmd = $self->get_command( command => '', archivename => undef, options => [], # on Debian, 7z doesn't like any options... fs_encoding => undef, # on Debian, 7z doesn't like any options... default_options => [], # on Debian, 7z doesn't like any options... ); my $fh = eval { $self->run($cmd, binmode => ':raw') }; if( ! $@ ) { local $/ = "\n"; my @output = <$fh>; if( @output >= 3) { $output[1] =~ /^7-Zip\s+.*?(\d+\.\d+)\s+(?:\s*:\s*)?Copyright/ or return undef; return $1; } else { return undef } } } =head2 C<< $ar->open >> my @entries = $ar->open; for my $entry (@entries) { print $entry->name, "\n"; }; Lists the entries in the archive. A fresh archive which does not exist on disk yet has no entries. The returned entries are L instances. This method will one day move to the Path::Class-compatibility API. =cut # Iterate over the entries in the archive # Path::Class API sub open { my( $self )= @_; my @contents = $self->list(); } =head2 C<< $ar->memberNamed >> my $entry = $ar->memberNamed('hello_world.txt'); print $entry->fileName, "\n"; The path separator must be a forward slash ("/") This method will one day move to the Archive::Zip-compatibility API. =cut # Archive::Zip API sub memberNamed { my( $self, $name, %options )= @_; my( $entry ) = grep { $_->fileName eq $name } $self->members( %options ); $entry } # Archive::Zip API sub list { my( $self, %options )= @_; if( ! grep { defined $_ } $options{archivename}, $self->{archivename}) { # We are an archive that does not exist on disk yet return }; my $cmd = $self->get_command( command => "l", options => ["-slt"], %options ); my $fh = $self->run($cmd, encoding => $options{ fs_encoding } ); my @output = <$fh>; my %results = ( header => [], archive => [], ); # Get/skip header while( @output and $output[0] !~ /^--\s*$/ ) { my $line = shift @output; $line =~ s!\s+$!!; push @{ $results{ header }}, $line; }; # Get/skip archive information while( @output and $output[0] !~ /^----------\s*$/ ) { my $line = shift @output; $line =~ s!\s+$!!; push @{ $results{ archive }}, $line; }; if( $output[0] =~ /^----------\s*$/ ) { shift @output; } else { warn "Unexpected line in 7zip output, hope that's OK: [$output[0]]"; }; my @members; # Split entries my %entry_info; while( @output ) { my $line = shift @output; if( $line =~ /^([\w ]+) =(?: (.*?)|)\s*$/ ) { $entry_info{ $1 } = $2; } elsif($line =~ /^\s*$/) { push @members, Archive::SevenZip::Entry->new( %entry_info, _Container => $self, ); %entry_info = (); } else { croak "Unknown file entry [$line]"; }; }; return @members } { no warnings 'once'; *members = \&list; } =head2 C<< $ar->openMemberFH >> my $fh = $ar->openMemberFH('test.txt'); while( <$fh> ) { print "test.txt: $_"; }; Reads the uncompressed content of the member from the archive. This method will one day move to the Archive::Zip-compatibility API. =cut sub openMemberFH { my( $self, %options ); if( @_ == 2 ) { ($self,$options{ membername })= @_; } else { ($self,%options) = @_; }; defined $options{ membername } or croak "Need member name to extract"; my $cmd = $self->get_command( command => "e", options => ["-so"], members => [$options{membername}] ); my $fh = $self->run($cmd, encoding => $options{ encoding }, binmode => $options{ binmode }); return $fh } sub content { my( $self, %options ) = @_; my $fh = $self->openMemberFH( %options ); local $/; <$fh> } =head2 C<< $ar->extractMember >> $ar->extractMember('test.txt' => 'extracted_test.txt'); Extracts the uncompressed content of the member from the archive. This method will one day move to the Archive::Zip-compatibility API. =cut # Archive::Zip API sub extractMember { my( $self, $memberOrName, $extractedName, %_options ) = @_; $extractedName = $memberOrName unless defined $extractedName; my %options = (%$self, %_options); my $target_dir = dirname $extractedName; my $target_name = basename $extractedName; my $cmd = $self->get_command( command => "e", archivename => $options{ archivename }, members => [ $memberOrName ], options => [ "-o$target_dir" ], ); my $fh = $self->run($cmd, encoding => $options{ encoding }); while( <$fh>) { warn $_ if $options{ verbose }; }; if( basename $memberOrName ne $target_name ) { rename "$target_dir/" . basename($memberOrName) => $extractedName or croak "Couldn't move '$memberOrName' to '$extractedName': $!"; }; return AZ_OK; }; =head2 C<< $ar->removeMember >> $ar->removeMember('test.txt'); Removes the member from the archive. =cut # strikingly similar to Archive::Zip API sub removeMember { my( $self, $name, %_options ) = @_; my %options = (%$self, %_options); if( $^O =~ /MSWin/ ) { $name =~ s!/!\\!g; } my $cmd = $self->get_command( command => "d", archivename => $options{ archivename }, members => [ $name ], ); my $fh = $self->run($cmd, encoding => $options{ encoding } ); $self->wait($fh, %options); return AZ_OK; }; sub add_quotes { map { defined $_ && /\s/ ? qq{"$_"} : $_ } @_ }; sub get_command { my( $self, %options )= @_; $options{ members } ||= []; $options{ archivename } = $self->{ archivename } unless defined $options{ archivename }; if( ! exists $options{ fs_encoding }) { $options{ fs_encoding } = defined $self->{ fs_encoding } ? $self->{ fs_encoding } : $class_defaults{ fs_encoding }; }; if( ! defined $options{ default_options }) { $options{ default_options } = defined $self->{ default_options } ? $self->{ default_options } : $class_defaults{ default_options }; }; my @charset; if( defined $options{ fs_encoding }) { exists $sevenzip_charsetname{ $options{ fs_encoding }} or croak "Unknown filesystem encoding '$options{ fs_encoding }'"; if( my $charset = $sevenzip_charsetname{ $options{ fs_encoding }}) { push @charset, "-scs" . $sevenzip_charsetname{ $options{ fs_encoding }}; }; }; for(@{ $options{ members }}) { $_ = encode $options{ fs_encoding }, $_; }; # Now quote what needs to be quoted for( @{ $options{ options }}, @{ $options{ members }}, $options{ archivename }, "$self->{ '7zip' }") { }; return [grep {defined $_} add_quotes($self->{ '7zip' }), @{ $options{ default_options }}, $options{ command }, @charset, add_quotes( @{ $options{ options }} ), add_quotes( $options{ archivename } ), add_quotes( @{ $options{ members }} ), ]; } sub run { my( $self, $cmd, %options )= @_; my $mode = '-|'; if( defined $options{ stdin }) { $mode = '|-'; }; my $fh; warn "Opening [@$cmd]" if $options{ verbose }; if( $self->{verbose} ) { CORE::open( $fh, $mode, @$cmd) or croak "Couldn't launch [$mode @$cmd]: $!/$?"; } else { CORE::open( my $fh_err, '>', File::Spec->devnull ) or warn "Couldn't redirect child STDERR"; my $errh = fileno $fh_err; # We accumulate zombie PIDs here, ah well. my $pid = open3( my $fh_in, my $fh_out, '>&' . $errh, @$cmd) or croak "Couldn't launch [$mode @$cmd]: $!/$?"; if( $mode eq '|-' ) { $fh = $fh_in; } else { $fh = $fh_out }; } if( $options{ encoding }) { binmode $fh, ":encoding($options{ encoding })"; } elsif( $options{ binmode } ) { binmode $fh, $options{ binmode }; }; if( $options{ stdin }) { print {$fh} $options{ stdin }; close $fh; } elsif( $options{ skip }) { for( 1..$options{ skip }) { # Read that many lines local $/ = "\n"; scalar <$fh>; }; }; $fh; } sub archive_or_temp { my( $self ) = @_; if( ! defined $self->{archivename} ) { $self->{is_tempfile} = 1; (my( $fh ),$self->{archivename}) = tempfile( SUFFIX => ".$self->{type}" ); close $fh; unlink $self->{archivename}; }; $self->{archivename} }; sub wait { my( $self, $fh, %options ) = @_; while( <$fh>) { warn $_ if ($options{ verbose } || $self->{verbose}) }; } =head2 C<< $ar->add_scalar >> $ar->add_scalar( "Some name.txt", "This is the content" ); Adds a scalar as an archive member. Unfortunately, 7zip doesn't reliably read archive members from STDIN, so the scalar will be written to a tempfile, added to the archive and then renamed in the archive. This requires 7zip version 9.30+ =cut sub add_scalar { my( $self, $name, $scalar )= @_; # 7zip doesn't really support reading archive members from STDIN :-( my($fh, $tempname) = tempfile; binmode $fh, ':raw'; print {$fh} $scalar; close $fh; # Only supports 7z archive type?! # 7zip will magically append .7z to the filename :-( my $cmd = $self->get_command( command => 'a', archivename => $self->archive_or_temp, members => [$tempname], #options => ], ); $fh = $self->run( $cmd ); $self->wait($fh); unlink $tempname or warn "Couldn't unlink '$tempname': $!"; # Hopefully your version of 7zip can rename members (9.30+): $cmd = $self->get_command( command => 'rn', archivename => $self->archive_or_temp, members => [basename($tempname), $name], #options => ], ); $fh = $self->run( $cmd ); $self->wait($fh); # Once 7zip supports reading from stdin, this will work again: #my $fh = $self->run( $cmd, # binmode => ':raw', # stdin => $scalar, # verbose => 1, #); }; =head2 C<< $ar->add_directory >> $ar->add_directory( "real_etc", "etc" ); Adds an empty directory This currently ignores the directory date and time if the directory exists =cut sub add_directory { my( $self, $localname, $target )= @_; $target ||= $localname; # Create an empty directory, add it to the archive, # then rename that temp name to the wanted name: my $tempname = tempdir; my $cmd = $self->get_command( command => 'a', archivename => $self->archive_or_temp, members => [$tempname], options => ['-r0'], ); my $fh = $self->run( $cmd ); $self->wait($fh); # Hopefully your version of 7zip can rename members (9.30+): $cmd = $self->get_command( command => 'rn', archivename => $self->archive_or_temp, members => [basename($tempname), $target], ); $fh = $self->run( $cmd ); $self->wait($fh); # Once 7zip supports reading from stdin, this will work again: #my $fh = $self->run( $cmd, # binmode => ':raw', # stdin => $scalar, # verbose => 1, #); }; sub add { my( $self, %options )= @_; my @items = @{ delete $options{ items } || [] }; # Split up the list into one batch for the listfiles # and the list of files we need to rename my @filelist; for my $item (@items) { if( ! ref $item ) { $item = [ $item, $item ]; }; my( $name, $storedName ) = @$item; if( $name ne $storedName ) { # We need to pipe to 7zip from stdin (no, we don't, we can rename afterwards) # This still means we might overwrite an already existing file in the archive... # But 7-zip seems to not like duplicate filenames anyway in "@"-listfiles... my $cmd = $self->get_command( command => 'a', archivename => $self->archive_or_temp, members => [$name], #options => ], ); my $fh = $self->run( $cmd ); $self->wait($fh, %options ); $cmd = $self->get_command( command => 'rn', archivename => $self->archive_or_temp, members => [$name, $storedName], #options => ], ); $fh = $self->run( $cmd ); $self->wait($fh, %options ); } else { # 7zip can read the file from disk # Write the name to a tempfile to be read by 7zip for batching push @filelist, $name; }; }; if( @filelist ) { my( $fh, $name) = tempfile; binmode $fh, ':raw'; print {$fh} join "\r\n", @filelist; close $fh; my @options; if( $options{ recursive }) { push @options, '-r'; }; my $cmd = $self->get_command( command => 'a', archivename => $self->archive_or_temp, members => ['@'.$name], options => \@options ); $fh = $self->run( $cmd ); $self->wait($fh, %options); }; }; sub archiveZipApi { my( $class, %options ) = @_; require Archive::SevenZip::API::ArchiveZip; Archive::SevenZip::API::ArchiveZip->new( %options ) } package Path::Class::Archive::Handle; use strict; =head1 NAME Path::Class::Archive - treat archives as directories =cut package Path::Class::Archive; 1; __END__ =head1 CAUTION This module tries to mimic the API of L in some cases and in other cases, the API of L. It is also a very rough draft that just happens to be doing what I need, mostly extracting files. =head1 SEE ALSO L - also supports unpacking from 7z archives L - uncompressor for the LZMA compression method used by 7z =head1 REPOSITORY The public repository of this module is L. =head1 SUPPORT The public support forum of this module is L. =head1 BUG TRACKER Please report bugs in this module via the RT CPAN bug queue at L or via mail to L. =head1 AUTHOR Max Maischein C =head1 COPYRIGHT (c) Copyright 2015-2016 by Max Maischein C. =head1 LICENSE This module is released under the same terms as Perl itself. =cut