commit ee1dc9fb9a1a330b0661e4d807125859ee0c0064 Author: Mario Fetka Date: Fri Sep 15 14:26:26 2017 +0200 Imported Upstream version 1.10 diff --git a/Build.PL b/Build.PL new file mode 100644 index 0000000..fdbd1a9 --- /dev/null +++ b/Build.PL @@ -0,0 +1 @@ +require 'Makefile.PL'; diff --git a/Changes b/Changes new file mode 100644 index 0000000..93b2145 --- /dev/null +++ b/Changes @@ -0,0 +1,82 @@ +Revision history for Perl extension WWW::Google::SiteMap. + +1.09 + - Replaced %F with %Y-%m-%d in strftime calls, to correct a Solaris bug + (since Solaris strftime doesn't know about %F). Reproted by Fredrik + Acosta. + - Added more information to error messages when parsing an existing + sitemap fails. Reported by Vinko Vrsalovic Bolte. + +1.08 + - Converted installer from ExtUtils::MakeMaker to Module::Install, which + should make it easier to install for Windows users. + - Fixed a uri-encoding problem in WWW::Google::SiteMap::URL, reported + by Jeff Horn. + - Added some additional test cases and cleaned up some test cases. + +1.07 + - Added changes for 1.06 to Changes file, which were accidentally left + out of the last release. + - Fixed the WWW-Google-SiteMap-Robot.t test to skip testing if + WWW::Mechanize is not installed, reported by Jan Kratochvil. + +1.06 + - Correction to documentation for WWW::Google::SiteMap::Robot + ($robot->start should be $robot->run), reported by Martin Kissner. + - Adjusted WWW::Google::SiteMap::Robot to discard anchors from URLs, + reported by Martin Kissner. + +1.05 + - Removed an accidental prerequisite from WWW::Google::SiteMap::URL. You + can give it DateTime objects to set the lastmod option, but you don't + have to, there are other ways to set it as well. + +1.04 + - Fixed a documentation error in WWW::Google::SiteMap, and a problem with + the XML headers generated for sitemap indexes, both reported by + Michael Smith. + - Use 'use vars' instead of 'our' for older versions of perl. + +1.03 + - Removed some leftover debugging output from WWW::Google::SiteMap::Robot. + - Fixed date generation to deal with strangeness in the dates that Google + will accept. Note that this means some date/time strings that were + previously accepted may now fail. See L for + details of the acceptable values to lastmod(). + - Added some more tests. + +1.02 + - Fix a problem caused by the switch to XML::Twig, which was leaving off + some of the XML header information. Reported by Olaf Anders. + - Fixed a bug with WWW::Google::SiteMap::Robot, which forgot to load the + WWW::Google::SiteMap::Ping module before trying to send pings. + +1.01 + - Accidentally forgot to include the new WWW::Google::SiteMap::Robot + class, which helps build sitemaps by spidering your web site. + +1.00 + - Version 1.00 Released! + - Fixed Zlib detection problem reported by Lance Cleveland. + - Check to make sure that the sitemap file was opened correctly, rather + than just crashing when we try to write to it, also reported by + Lance Cleveland. + - Added support for sitemap indexes (see WWW::Google::SiteMap::Index) + - Added support for notifying Google when your sitemaps and sitemap + indexes are updated (see WWW::Google::SiteMap::Ping). Suggested by + Frank Naude. + - Fixed a bug in the ISO-8601 time format checking. + +0.03 + - Changed from XML::Simple to XML::Twig for XML parsing/generating, this + means you can now validate your sitemaps with an XML validator. + - Fixed some documentation errors, spotted by Ing. Branislav Gerzo + +0.02 + - Renamed from Google::SiteMap to WWW::Google::SiteMap, shouldn't have + created a new top-level namespace in the first place. + +0.01 + - original version; created by h2xs 1.23 with options + -X Google::SiteMap + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..e972183 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,22 @@ +Build.PL +Changes +inc/Module/AutoInstall.pm +inc/Module/Install.pm +inc/Module/Install/AutoInstall.pm +inc/Module/Install/Base.pm +inc/Module/Install/Can.pm +inc/Module/Install/Fetch.pm +inc/Module/Install/Include.pm +inc/Module/Install/Makefile.pm +inc/Module/Install/Metadata.pm +inc/Module/Install/Win32.pm +inc/Module/Install/WriteAll.pm +lib/WWW/Google/SiteMap.pm +lib/WWW/Google/SiteMap/Index.pm +lib/WWW/Google/SiteMap/Ping.pm +lib/WWW/Google/SiteMap/Robot.pm +lib/WWW/Google/SiteMap/URL.pm +Makefile.PL +MANIFEST This list of files +META.yml +README diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..cfc29b1 --- /dev/null +++ b/META.yml @@ -0,0 +1,12 @@ +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: WWW-Google-SiteMap +version: 1.06 +version_from: lib/WWW/Google/SiteMap.pm +installdirs: site +requires: + IO::File: 0 + XML::Twig: 0 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..b10ef23 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,19 @@ +use inc::Module::Install; + +name 'WWW-Google-SiteMap'; +license 'perl'; +all_from 'lib/WWW/Google/SiteMap.pm'; +requires 'XML::Twig'; +requires 'IO::File'; +requires 'Carp'; +requires 'POSIX'; +requires 'URI::Escape'; +recommends 'IO::Zlib'; +recommends 'LWP::UserAgent'; +recommends 'WWW::Mechanize'; +recommends 'WWW::RobotRules'; + +auto_install; +WriteAll; + +# TODO - Module::Signature diff --git a/README b/README new file mode 100644 index 0000000..fd77499 --- /dev/null +++ b/README @@ -0,0 +1,8 @@ +WWW-Google-SiteMap +================== + +WWW::Google::SiteMap is DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to Search::Sitemap + diff --git a/inc/Module/AutoInstall.pm b/inc/Module/AutoInstall.pm new file mode 100644 index 0000000..dfb8ef7 --- /dev/null +++ b/inc/Module/AutoInstall.pm @@ -0,0 +1,805 @@ +#line 1 +package Module::AutoInstall; + +use strict; +use Cwd (); +use ExtUtils::MakeMaker (); + +use vars qw{$VERSION}; +BEGIN { + $VERSION = '1.03'; +} + +# special map on pre-defined feature sets +my %FeatureMap = ( + '' => 'Core Features', # XXX: deprecated + '-core' => 'Core Features', +); + +# various lexical flags +my ( @Missing, @Existing, %DisabledTests, $UnderCPAN, $HasCPANPLUS ); +my ( + $Config, $CheckOnly, $SkipInstall, $AcceptDefault, $TestOnly, $AllDeps +); +my ( $PostambleActions, $PostambleUsed ); + +# See if it's a testing or non-interactive session +_accept_default( $ENV{AUTOMATED_TESTING} or ! -t STDIN ); +_init(); + +sub _accept_default { + $AcceptDefault = shift; +} + +sub missing_modules { + return @Missing; +} + +sub do_install { + __PACKAGE__->install( + [ + $Config + ? ( UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} ) + : () + ], + @Missing, + ); +} + +# initialize various flags, and/or perform install +sub _init { + foreach my $arg ( + @ARGV, + split( + /[\s\t]+/, + $ENV{PERL_AUTOINSTALL} || $ENV{PERL_EXTUTILS_AUTOINSTALL} || '' + ) + ) + { + if ( $arg =~ /^--config=(.*)$/ ) { + $Config = [ split( ',', $1 ) ]; + } + elsif ( $arg =~ /^--installdeps=(.*)$/ ) { + __PACKAGE__->install( $Config, @Missing = split( /,/, $1 ) ); + exit 0; + } + elsif ( $arg =~ /^--default(?:deps)?$/ ) { + $AcceptDefault = 1; + } + elsif ( $arg =~ /^--check(?:deps)?$/ ) { + $CheckOnly = 1; + } + elsif ( $arg =~ /^--skip(?:deps)?$/ ) { + $SkipInstall = 1; + } + elsif ( $arg =~ /^--test(?:only)?$/ ) { + $TestOnly = 1; + } + elsif ( $arg =~ /^--all(?:deps)?$/ ) { + $AllDeps = 1; + } + } +} + +# overrides MakeMaker's prompt() to automatically accept the default choice +sub _prompt { + goto &ExtUtils::MakeMaker::prompt unless $AcceptDefault; + + my ( $prompt, $default ) = @_; + my $y = ( $default =~ /^[Yy]/ ); + + print $prompt, ' [', ( $y ? 'Y' : 'y' ), '/', ( $y ? 'n' : 'N' ), '] '; + print "$default\n"; + return $default; +} + +# the workhorse +sub import { + my $class = shift; + my @args = @_ or return; + my $core_all; + + print "*** $class version " . $class->VERSION . "\n"; + print "*** Checking for Perl dependencies...\n"; + + my $cwd = Cwd::cwd(); + + $Config = []; + + my $maxlen = length( + ( + sort { length($b) <=> length($a) } + grep { /^[^\-]/ } + map { + ref($_) + ? ( ( ref($_) eq 'HASH' ) ? keys(%$_) : @{$_} ) + : '' + } + map { +{@args}->{$_} } + grep { /^[^\-]/ or /^-core$/i } keys %{ +{@args} } + )[0] + ); + + # We want to know if we're under CPAN early to avoid prompting, but + # if we aren't going to try and install anything anyway then skip the + # check entirely since we don't want to have to load (and configure) + # an old CPAN just for a cosmetic message + + $UnderCPAN = _check_lock(1) unless $SkipInstall; + + while ( my ( $feature, $modules ) = splice( @args, 0, 2 ) ) { + my ( @required, @tests, @skiptests ); + my $default = 1; + my $conflict = 0; + + if ( $feature =~ m/^-(\w+)$/ ) { + my $option = lc($1); + + # check for a newer version of myself + _update_to( $modules, @_ ) and return if $option eq 'version'; + + # sets CPAN configuration options + $Config = $modules if $option eq 'config'; + + # promote every features to core status + $core_all = ( $modules =~ /^all$/i ) and next + if $option eq 'core'; + + next unless $option eq 'core'; + } + + print "[" . ( $FeatureMap{ lc($feature) } || $feature ) . "]\n"; + + $modules = [ %{$modules} ] if UNIVERSAL::isa( $modules, 'HASH' ); + + unshift @$modules, -default => &{ shift(@$modules) } + if ( ref( $modules->[0] ) eq 'CODE' ); # XXX: bugward combatability + + while ( my ( $mod, $arg ) = splice( @$modules, 0, 2 ) ) { + if ( $mod =~ m/^-(\w+)$/ ) { + my $option = lc($1); + + $default = $arg if ( $option eq 'default' ); + $conflict = $arg if ( $option eq 'conflict' ); + @tests = @{$arg} if ( $option eq 'tests' ); + @skiptests = @{$arg} if ( $option eq 'skiptests' ); + + next; + } + + printf( "- %-${maxlen}s ...", $mod ); + + if ( $arg and $arg =~ /^\D/ ) { + unshift @$modules, $arg; + $arg = 0; + } + + # XXX: check for conflicts and uninstalls(!) them. + my $cur = _load($mod); + if (_version_cmp ($cur, $arg) >= 0) + { + print "loaded. ($cur" . ( $arg ? " >= $arg" : '' ) . ")\n"; + push @Existing, $mod => $arg; + $DisabledTests{$_} = 1 for map { glob($_) } @skiptests; + } + else { + if (not defined $cur) # indeed missing + { + print "missing." . ( $arg ? " (would need $arg)" : '' ) . "\n"; + } + else + { + # no need to check $arg as _version_cmp ($cur, undef) would satisfy >= above + print "too old. ($cur < $arg)\n"; + } + + push @required, $mod => $arg; + } + } + + next unless @required; + + my $mandatory = ( $feature eq '-core' or $core_all ); + + if ( + !$SkipInstall + and ( + $CheckOnly + or ($mandatory and $UnderCPAN) + or $AllDeps + or _prompt( + qq{==> Auto-install the } + . ( @required / 2 ) + . ( $mandatory ? ' mandatory' : ' optional' ) + . qq{ module(s) from CPAN?}, + $default ? 'y' : 'n', + ) =~ /^[Yy]/ + ) + ) + { + push( @Missing, @required ); + $DisabledTests{$_} = 1 for map { glob($_) } @skiptests; + } + + elsif ( !$SkipInstall + and $default + and $mandatory + and + _prompt( qq{==> The module(s) are mandatory! Really skip?}, 'n', ) + =~ /^[Nn]/ ) + { + push( @Missing, @required ); + $DisabledTests{$_} = 1 for map { glob($_) } @skiptests; + } + + else { + $DisabledTests{$_} = 1 for map { glob($_) } @tests; + } + } + + if ( @Missing and not( $CheckOnly or $UnderCPAN ) ) { + require Config; + print +"*** Dependencies will be installed the next time you type '$Config::Config{make}'.\n"; + + # make an educated guess of whether we'll need root permission. + print " (You may need to do that as the 'root' user.)\n" + if eval '$>'; + } + print "*** $class configuration finished.\n"; + + chdir $cwd; + + # import to main:: + no strict 'refs'; + *{'main::WriteMakefile'} = \&Write if caller(0) eq 'main'; +} + +sub _running_under { + my $thing = shift; + print <<"END_MESSAGE"; +*** Since we're running under ${thing}, I'll just let it take care + of the dependency's installation later. +END_MESSAGE + return 1; +} + +# Check to see if we are currently running under CPAN.pm and/or CPANPLUS; +# if we are, then we simply let it taking care of our dependencies +sub _check_lock { + return unless @Missing or @_; + + my $cpan_env = $ENV{PERL5_CPAN_IS_RUNNING}; + + if ($ENV{PERL5_CPANPLUS_IS_RUNNING}) { + return _running_under($cpan_env ? 'CPAN' : 'CPANPLUS'); + } + + require CPAN; + + if ($CPAN::VERSION > '1.89') { + if ($cpan_env) { + return _running_under('CPAN'); + } + return; # CPAN.pm new enough, don't need to check further + } + + # last ditch attempt, this -will- configure CPAN, very sorry + + _load_cpan(1); # force initialize even though it's already loaded + + # Find the CPAN lock-file + my $lock = MM->catfile( $CPAN::Config->{cpan_home}, ".lock" ); + return unless -f $lock; + + # Check the lock + local *LOCK; + return unless open(LOCK, $lock); + + if ( + ( $^O eq 'MSWin32' ? _under_cpan() : == getppid() ) + and ( $CPAN::Config->{prerequisites_policy} || '' ) ne 'ignore' + ) { + print <<'END_MESSAGE'; + +*** Since we're running under CPAN, I'll just let it take care + of the dependency's installation later. +END_MESSAGE + return 1; + } + + close LOCK; + return; +} + +sub install { + my $class = shift; + + my $i; # used below to strip leading '-' from config keys + my @config = ( map { s/^-// if ++$i; $_ } @{ +shift } ); + + my ( @modules, @installed ); + while ( my ( $pkg, $ver ) = splice( @_, 0, 2 ) ) { + + # grep out those already installed + if ( _version_cmp( _load($pkg), $ver ) >= 0 ) { + push @installed, $pkg; + } + else { + push @modules, $pkg, $ver; + } + } + + return @installed unless @modules; # nothing to do + return @installed if _check_lock(); # defer to the CPAN shell + + print "*** Installing dependencies...\n"; + + return unless _connected_to('cpan.org'); + + my %args = @config; + my %failed; + local *FAILED; + if ( $args{do_once} and open( FAILED, '.#autoinstall.failed' ) ) { + while () { chomp; $failed{$_}++ } + close FAILED; + + my @newmod; + while ( my ( $k, $v ) = splice( @modules, 0, 2 ) ) { + push @newmod, ( $k => $v ) unless $failed{$k}; + } + @modules = @newmod; + } + + if ( _has_cpanplus() and not $ENV{PERL_AUTOINSTALL_PREFER_CPAN} ) { + _install_cpanplus( \@modules, \@config ); + } else { + _install_cpan( \@modules, \@config ); + } + + print "*** $class installation finished.\n"; + + # see if we have successfully installed them + while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) { + if ( _version_cmp( _load($pkg), $ver ) >= 0 ) { + push @installed, $pkg; + } + elsif ( $args{do_once} and open( FAILED, '>> .#autoinstall.failed' ) ) { + print FAILED "$pkg\n"; + } + } + + close FAILED if $args{do_once}; + + return @installed; +} + +sub _install_cpanplus { + my @modules = @{ +shift }; + my @config = _cpanplus_config( @{ +shift } ); + my $installed = 0; + + require CPANPLUS::Backend; + my $cp = CPANPLUS::Backend->new; + my $conf = $cp->configure_object; + + return unless $conf->can('conf') # 0.05x+ with "sudo" support + or _can_write($conf->_get_build('base')); # 0.04x + + # if we're root, set UNINST=1 to avoid trouble unless user asked for it. + my $makeflags = $conf->get_conf('makeflags') || ''; + if ( UNIVERSAL::isa( $makeflags, 'HASH' ) ) { + # 0.03+ uses a hashref here + $makeflags->{UNINST} = 1 unless exists $makeflags->{UNINST}; + + } else { + # 0.02 and below uses a scalar + $makeflags = join( ' ', split( ' ', $makeflags ), 'UNINST=1' ) + if ( $makeflags !~ /\bUNINST\b/ and eval qq{ $> eq '0' } ); + + } + $conf->set_conf( makeflags => $makeflags ); + $conf->set_conf( prereqs => 1 ); + + + + while ( my ( $key, $val ) = splice( @config, 0, 2 ) ) { + $conf->set_conf( $key, $val ); + } + + my $modtree = $cp->module_tree; + while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) { + print "*** Installing $pkg...\n"; + + MY::preinstall( $pkg, $ver ) or next if defined &MY::preinstall; + + my $success; + my $obj = $modtree->{$pkg}; + + if ( $obj and _version_cmp( $obj->{version}, $ver ) >= 0 ) { + my $pathname = $pkg; + $pathname =~ s/::/\\W/; + + foreach my $inc ( grep { m/$pathname.pm/i } keys(%INC) ) { + delete $INC{$inc}; + } + + my $rv = $cp->install( modules => [ $obj->{module} ] ); + + if ( $rv and ( $rv->{ $obj->{module} } or $rv->{ok} ) ) { + print "*** $pkg successfully installed.\n"; + $success = 1; + } else { + print "*** $pkg installation cancelled.\n"; + $success = 0; + } + + $installed += $success; + } else { + print << "."; +*** Could not find a version $ver or above for $pkg; skipping. +. + } + + MY::postinstall( $pkg, $ver, $success ) if defined &MY::postinstall; + } + + return $installed; +} + +sub _cpanplus_config { + my @config = (); + while ( @_ ) { + my ($key, $value) = (shift(), shift()); + if ( $key eq 'prerequisites_policy' ) { + if ( $value eq 'follow' ) { + $value = CPANPLUS::Internals::Constants::PREREQ_INSTALL(); + } elsif ( $value eq 'ask' ) { + $value = CPANPLUS::Internals::Constants::PREREQ_ASK(); + } elsif ( $value eq 'ignore' ) { + $value = CPANPLUS::Internals::Constants::PREREQ_IGNORE(); + } else { + die "*** Cannot convert option $key = '$value' to CPANPLUS version.\n"; + } + } else { + die "*** Cannot convert option $key to CPANPLUS version.\n"; + } + } + return @config; +} + +sub _install_cpan { + my @modules = @{ +shift }; + my @config = @{ +shift }; + my $installed = 0; + my %args; + + _load_cpan(); + require Config; + + if (CPAN->VERSION < 1.80) { + # no "sudo" support, probe for writableness + return unless _can_write( MM->catfile( $CPAN::Config->{cpan_home}, 'sources' ) ) + and _can_write( $Config::Config{sitelib} ); + } + + # if we're root, set UNINST=1 to avoid trouble unless user asked for it. + my $makeflags = $CPAN::Config->{make_install_arg} || ''; + $CPAN::Config->{make_install_arg} = + join( ' ', split( ' ', $makeflags ), 'UNINST=1' ) + if ( $makeflags !~ /\bUNINST\b/ and eval qq{ $> eq '0' } ); + + # don't show start-up info + $CPAN::Config->{inhibit_startup_message} = 1; + + # set additional options + while ( my ( $opt, $arg ) = splice( @config, 0, 2 ) ) { + ( $args{$opt} = $arg, next ) + if $opt =~ /^force$/; # pseudo-option + $CPAN::Config->{$opt} = $arg; + } + + local $CPAN::Config->{prerequisites_policy} = 'follow'; + + while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) { + MY::preinstall( $pkg, $ver ) or next if defined &MY::preinstall; + + print "*** Installing $pkg...\n"; + + my $obj = CPAN::Shell->expand( Module => $pkg ); + my $success = 0; + + if ( $obj and _version_cmp( $obj->cpan_version, $ver ) >= 0 ) { + my $pathname = $pkg; + $pathname =~ s/::/\\W/; + + foreach my $inc ( grep { m/$pathname.pm/i } keys(%INC) ) { + delete $INC{$inc}; + } + + my $rv = $args{force} ? CPAN::Shell->force( install => $pkg ) + : CPAN::Shell->install($pkg); + $rv ||= eval { + $CPAN::META->instance( 'CPAN::Distribution', $obj->cpan_file, ) + ->{install} + if $CPAN::META; + }; + + if ( $rv eq 'YES' ) { + print "*** $pkg successfully installed.\n"; + $success = 1; + } + else { + print "*** $pkg installation failed.\n"; + $success = 0; + } + + $installed += $success; + } + else { + print << "."; +*** Could not find a version $ver or above for $pkg; skipping. +. + } + + MY::postinstall( $pkg, $ver, $success ) if defined &MY::postinstall; + } + + return $installed; +} + +sub _has_cpanplus { + return ( + $HasCPANPLUS = ( + $INC{'CPANPLUS/Config.pm'} + or _load('CPANPLUS::Shell::Default') + ) + ); +} + +# make guesses on whether we're under the CPAN installation directory +sub _under_cpan { + require Cwd; + require File::Spec; + + my $cwd = File::Spec->canonpath( Cwd::cwd() ); + my $cpan = File::Spec->canonpath( $CPAN::Config->{cpan_home} ); + + return ( index( $cwd, $cpan ) > -1 ); +} + +sub _update_to { + my $class = __PACKAGE__; + my $ver = shift; + + return + if _version_cmp( _load($class), $ver ) >= 0; # no need to upgrade + + if ( + _prompt( "==> A newer version of $class ($ver) is required. Install?", + 'y' ) =~ /^[Nn]/ + ) + { + die "*** Please install $class $ver manually.\n"; + } + + print << "."; +*** Trying to fetch it from CPAN... +. + + # install ourselves + _load($class) and return $class->import(@_) + if $class->install( [], $class, $ver ); + + print << '.'; exit 1; + +*** Cannot bootstrap myself. :-( Installation terminated. +. +} + +# check if we're connected to some host, using inet_aton +sub _connected_to { + my $site = shift; + + return ( + ( _load('Socket') and Socket::inet_aton($site) ) or _prompt( + qq( +*** Your host cannot resolve the domain name '$site', which + probably means the Internet connections are unavailable. +==> Should we try to install the required module(s) anyway?), 'n' + ) =~ /^[Yy]/ + ); +} + +# check if a directory is writable; may create it on demand +sub _can_write { + my $path = shift; + mkdir( $path, 0755 ) unless -e $path; + + return 1 if -w $path; + + print << "."; +*** You are not allowed to write to the directory '$path'; + the installation may fail due to insufficient permissions. +. + + if ( + eval '$>' and lc(`sudo -V`) =~ /version/ and _prompt( + qq( +==> Should we try to re-execute the autoinstall process with 'sudo'?), + ((-t STDIN) ? 'y' : 'n') + ) =~ /^[Yy]/ + ) + { + + # try to bootstrap ourselves from sudo + print << "."; +*** Trying to re-execute the autoinstall process with 'sudo'... +. + my $missing = join( ',', @Missing ); + my $config = join( ',', + UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} ) + if $Config; + + return + unless system( 'sudo', $^X, $0, "--config=$config", + "--installdeps=$missing" ); + + print << "."; +*** The 'sudo' command exited with error! Resuming... +. + } + + return _prompt( + qq( +==> Should we try to install the required module(s) anyway?), 'n' + ) =~ /^[Yy]/; +} + +# load a module and return the version it reports +sub _load { + my $mod = pop; # class/instance doesn't matter + my $file = $mod; + + $file =~ s|::|/|g; + $file .= '.pm'; + + local $@; + return eval { require $file; $mod->VERSION } || ( $@ ? undef: 0 ); +} + +# Load CPAN.pm and it's configuration +sub _load_cpan { + return if $CPAN::VERSION and $CPAN::Config and not @_; + require CPAN; + if ( $CPAN::HandleConfig::VERSION ) { + # Newer versions of CPAN have a HandleConfig module + CPAN::HandleConfig->load; + } else { + # Older versions had the load method in Config directly + CPAN::Config->load; + } +} + +# compare two versions, either use Sort::Versions or plain comparison +# return values same as <=> +sub _version_cmp { + my ( $cur, $min ) = @_; + return -1 unless defined $cur; # if 0 keep comparing + return 1 unless $min; + + $cur =~ s/\s+$//; + + # check for version numbers that are not in decimal format + if ( ref($cur) or ref($min) or $cur =~ /v|\..*\./ or $min =~ /v|\..*\./ ) { + if ( ( $version::VERSION or defined( _load('version') )) and + version->can('new') + ) { + + # use version.pm if it is installed. + return version->new($cur) <=> version->new($min); + } + elsif ( $Sort::Versions::VERSION or defined( _load('Sort::Versions') ) ) + { + + # use Sort::Versions as the sorting algorithm for a.b.c versions + return Sort::Versions::versioncmp( $cur, $min ); + } + + warn "Cannot reliably compare non-decimal formatted versions.\n" + . "Please install version.pm or Sort::Versions.\n"; + } + + # plain comparison + local $^W = 0; # shuts off 'not numeric' bugs + return $cur <=> $min; +} + +# nothing; this usage is deprecated. +sub main::PREREQ_PM { return {}; } + +sub _make_args { + my %args = @_; + + $args{PREREQ_PM} = { %{ $args{PREREQ_PM} || {} }, @Existing, @Missing } + if $UnderCPAN or $TestOnly; + + if ( $args{EXE_FILES} and -e 'MANIFEST' ) { + require ExtUtils::Manifest; + my $manifest = ExtUtils::Manifest::maniread('MANIFEST'); + + $args{EXE_FILES} = + [ grep { exists $manifest->{$_} } @{ $args{EXE_FILES} } ]; + } + + $args{test}{TESTS} ||= 't/*.t'; + $args{test}{TESTS} = join( ' ', + grep { !exists( $DisabledTests{$_} ) } + map { glob($_) } split( /\s+/, $args{test}{TESTS} ) ); + + my $missing = join( ',', @Missing ); + my $config = + join( ',', UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} ) + if $Config; + + $PostambleActions = ( + ($missing and not $UnderCPAN) + ? "\$(PERL) $0 --config=$config --installdeps=$missing" + : "\$(NOECHO) \$(NOOP)" + ); + + return %args; +} + +# a wrapper to ExtUtils::MakeMaker::WriteMakefile +sub Write { + require Carp; + Carp::croak "WriteMakefile: Need even number of args" if @_ % 2; + + if ($CheckOnly) { + print << "."; +*** Makefile not written in check-only mode. +. + return; + } + + my %args = _make_args(@_); + + no strict 'refs'; + + $PostambleUsed = 0; + local *MY::postamble = \&postamble unless defined &MY::postamble; + ExtUtils::MakeMaker::WriteMakefile(%args); + + print << "." unless $PostambleUsed; +*** WARNING: Makefile written with customized MY::postamble() without + including contents from Module::AutoInstall::postamble() -- + auto installation features disabled. Please contact the author. +. + + return 1; +} + +sub postamble { + $PostambleUsed = 1; + + return <<"END_MAKE"; + +config :: installdeps +\t\$(NOECHO) \$(NOOP) + +checkdeps :: +\t\$(PERL) $0 --checkdeps + +installdeps :: +\t$PostambleActions + +END_MAKE + +} + +1; + +__END__ + +#line 1056 diff --git a/inc/Module/Install.pm b/inc/Module/Install.pm new file mode 100644 index 0000000..51eda5d --- /dev/null +++ b/inc/Module/Install.pm @@ -0,0 +1,430 @@ +#line 1 +package Module::Install; + +# For any maintainers: +# The load order for Module::Install is a bit magic. +# It goes something like this... +# +# IF ( host has Module::Install installed, creating author mode ) { +# 1. Makefile.PL calls "use inc::Module::Install" +# 2. $INC{inc/Module/Install.pm} set to installed version of inc::Module::Install +# 3. The installed version of inc::Module::Install loads +# 4. inc::Module::Install calls "require Module::Install" +# 5. The ./inc/ version of Module::Install loads +# } ELSE { +# 1. Makefile.PL calls "use inc::Module::Install" +# 2. $INC{inc/Module/Install.pm} set to ./inc/ version of Module::Install +# 3. The ./inc/ version of Module::Install loads +# } + +use 5.005; +use strict 'vars'; + +use vars qw{$VERSION $MAIN}; +BEGIN { + # All Module::Install core packages now require synchronised versions. + # This will be used to ensure we don't accidentally load old or + # different versions of modules. + # This is not enforced yet, but will be some time in the next few + # releases once we can make sure it won't clash with custom + # Module::Install extensions. + $VERSION = '0.91'; + + # Storage for the pseudo-singleton + $MAIN = undef; + + *inc::Module::Install::VERSION = *VERSION; + @inc::Module::Install::ISA = __PACKAGE__; + +} + + + + + +# Whether or not inc::Module::Install is actually loaded, the +# $INC{inc/Module/Install.pm} is what will still get set as long as +# the caller loaded module this in the documented manner. +# If not set, the caller may NOT have loaded the bundled version, and thus +# they may not have a MI version that works with the Makefile.PL. This would +# result in false errors or unexpected behaviour. And we don't want that. +my $file = join( '/', 'inc', split /::/, __PACKAGE__ ) . '.pm'; +unless ( $INC{$file} ) { die <<"END_DIE" } + +Please invoke ${\__PACKAGE__} with: + + use inc::${\__PACKAGE__}; + +not: + + use ${\__PACKAGE__}; + +END_DIE + + + + + +# If the script that is loading Module::Install is from the future, +# then make will detect this and cause it to re-run over and over +# again. This is bad. Rather than taking action to touch it (which +# is unreliable on some platforms and requires write permissions) +# for now we should catch this and refuse to run. +if ( -f $0 ) { + my $s = (stat($0))[9]; + + # If the modification time is only slightly in the future, + # sleep briefly to remove the problem. + my $a = $s - time; + if ( $a > 0 and $a < 5 ) { sleep 5 } + + # Too far in the future, throw an error. + my $t = time; + if ( $s > $t ) { die <<"END_DIE" } + +Your installer $0 has a modification time in the future ($s > $t). + +This is known to create infinite loops in make. + +Please correct this, then run $0 again. + +END_DIE +} + + + + + +# Build.PL was formerly supported, but no longer is due to excessive +# difficulty in implementing every single feature twice. +if ( $0 =~ /Build.PL$/i ) { die <<"END_DIE" } + +Module::Install no longer supports Build.PL. + +It was impossible to maintain duel backends, and has been deprecated. + +Please remove all Build.PL files and only use the Makefile.PL installer. + +END_DIE + + + + + +# To save some more typing in Module::Install installers, every... +# use inc::Module::Install +# ...also acts as an implicit use strict. +$^H |= strict::bits(qw(refs subs vars)); + + + + + +use Cwd (); +use File::Find (); +use File::Path (); +use FindBin; + +sub autoload { + my $self = shift; + my $who = $self->_caller; + my $cwd = Cwd::cwd(); + my $sym = "${who}::AUTOLOAD"; + $sym->{$cwd} = sub { + my $pwd = Cwd::cwd(); + if ( my $code = $sym->{$pwd} ) { + # Delegate back to parent dirs + goto &$code unless $cwd eq $pwd; + } + $$sym =~ /([^:]+)$/ or die "Cannot autoload $who - $sym"; + my $method = $1; + if ( uc($method) eq $method ) { + # Do nothing + return; + } elsif ( $method =~ /^_/ and $self->can($method) ) { + # Dispatch to the root M:I class + return $self->$method(@_); + } + + # Dispatch to the appropriate plugin + unshift @_, ( $self, $1 ); + goto &{$self->can('call')}; + }; +} + +sub import { + my $class = shift; + my $self = $class->new(@_); + my $who = $self->_caller; + + unless ( -f $self->{file} ) { + require "$self->{path}/$self->{dispatch}.pm"; + File::Path::mkpath("$self->{prefix}/$self->{author}"); + $self->{admin} = "$self->{name}::$self->{dispatch}"->new( _top => $self ); + $self->{admin}->init; + @_ = ($class, _self => $self); + goto &{"$self->{name}::import"}; + } + + *{"${who}::AUTOLOAD"} = $self->autoload; + $self->preload; + + # Unregister loader and worker packages so subdirs can use them again + delete $INC{"$self->{file}"}; + delete $INC{"$self->{path}.pm"}; + + # Save to the singleton + $MAIN = $self; + + return 1; +} + +sub preload { + my $self = shift; + unless ( $self->{extensions} ) { + $self->load_extensions( + "$self->{prefix}/$self->{path}", $self + ); + } + + my @exts = @{$self->{extensions}}; + unless ( @exts ) { + @exts = $self->{admin}->load_all_extensions; + } + + my %seen; + foreach my $obj ( @exts ) { + while (my ($method, $glob) = each %{ref($obj) . '::'}) { + next unless $obj->can($method); + next if $method =~ /^_/; + next if $method eq uc($method); + $seen{$method}++; + } + } + + my $who = $self->_caller; + foreach my $name ( sort keys %seen ) { + *{"${who}::$name"} = sub { + ${"${who}::AUTOLOAD"} = "${who}::$name"; + goto &{"${who}::AUTOLOAD"}; + }; + } +} + +sub new { + my ($class, %args) = @_; + + # ignore the prefix on extension modules built from top level. + my $base_path = Cwd::abs_path($FindBin::Bin); + unless ( Cwd::abs_path(Cwd::cwd()) eq $base_path ) { + delete $args{prefix}; + } + + return $args{_self} if $args{_self}; + + $args{dispatch} ||= 'Admin'; + $args{prefix} ||= 'inc'; + $args{author} ||= ($^O eq 'VMS' ? '_author' : '.author'); + $args{bundle} ||= 'inc/BUNDLES'; + $args{base} ||= $base_path; + $class =~ s/^\Q$args{prefix}\E:://; + $args{name} ||= $class; + $args{version} ||= $class->VERSION; + unless ( $args{path} ) { + $args{path} = $args{name}; + $args{path} =~ s!::!/!g; + } + $args{file} ||= "$args{base}/$args{prefix}/$args{path}.pm"; + $args{wrote} = 0; + + bless( \%args, $class ); +} + +sub call { + my ($self, $method) = @_; + my $obj = $self->load($method) or return; + splice(@_, 0, 2, $obj); + goto &{$obj->can($method)}; +} + +sub load { + my ($self, $method) = @_; + + $self->load_extensions( + "$self->{prefix}/$self->{path}", $self + ) unless $self->{extensions}; + + foreach my $obj (@{$self->{extensions}}) { + return $obj if $obj->can($method); + } + + my $admin = $self->{admin} or die <<"END_DIE"; +The '$method' method does not exist in the '$self->{prefix}' path! +Please remove the '$self->{prefix}' directory and run $0 again to load it. +END_DIE + + my $obj = $admin->load($method, 1); + push @{$self->{extensions}}, $obj; + + $obj; +} + +sub load_extensions { + my ($self, $path, $top) = @_; + + unless ( grep { ! ref $_ and lc $_ eq lc $self->{prefix} } @INC ) { + unshift @INC, $self->{prefix}; + } + + foreach my $rv ( $self->find_extensions($path) ) { + my ($file, $pkg) = @{$rv}; + next if $self->{pathnames}{$pkg}; + + local $@; + my $new = eval { require $file; $pkg->can('new') }; + unless ( $new ) { + warn $@ if $@; + next; + } + $self->{pathnames}{$pkg} = delete $INC{$file}; + push @{$self->{extensions}}, &{$new}($pkg, _top => $top ); + } + + $self->{extensions} ||= []; +} + +sub find_extensions { + my ($self, $path) = @_; + + my @found; + File::Find::find( sub { + my $file = $File::Find::name; + return unless $file =~ m!^\Q$path\E/(.+)\.pm\Z!is; + my $subpath = $1; + return if lc($subpath) eq lc($self->{dispatch}); + + $file = "$self->{path}/$subpath.pm"; + my $pkg = "$self->{name}::$subpath"; + $pkg =~ s!/!::!g; + + # If we have a mixed-case package name, assume case has been preserved + # correctly. Otherwise, root through the file to locate the case-preserved + # version of the package name. + if ( $subpath eq lc($subpath) || $subpath eq uc($subpath) ) { + my $content = Module::Install::_read($subpath . '.pm'); + my $in_pod = 0; + foreach ( split //, $content ) { + $in_pod = 1 if /^=\w/; + $in_pod = 0 if /^=cut/; + next if ($in_pod || /^=cut/); # skip pod text + next if /^\s*#/; # and comments + if ( m/^\s*package\s+($pkg)\s*;/i ) { + $pkg = $1; + last; + } + } + } + + push @found, [ $file, $pkg ]; + }, $path ) if -d $path; + + @found; +} + + + + + +##################################################################### +# Common Utility Functions + +sub _caller { + my $depth = 0; + my $call = caller($depth); + while ( $call eq __PACKAGE__ ) { + $depth++; + $call = caller($depth); + } + return $call; +} + +sub _read { + local *FH; + if ( $] >= 5.006 ) { + open( FH, '<', $_[0] ) or die "open($_[0]): $!"; + } else { + open( FH, "< $_[0]" ) or die "open($_[0]): $!"; + } + my $string = do { local $/; }; + close FH or die "close($_[0]): $!"; + return $string; +} + +sub _readperl { + my $string = Module::Install::_read($_[0]); + $string =~ s/(?:\015{1,2}\012|\015|\012)/\n/sg; + $string =~ s/(\n)\n*__(?:DATA|END)__\b.*\z/$1/s; + $string =~ s/\n\n=\w+.+?\n\n=cut\b.+?\n+/\n\n/sg; + return $string; +} + +sub _readpod { + my $string = Module::Install::_read($_[0]); + $string =~ s/(?:\015{1,2}\012|\015|\012)/\n/sg; + return $string if $_[0] =~ /\.pod\z/; + $string =~ s/(^|\n=cut\b.+?\n+)[^=\s].+?\n(\n=\w+|\z)/$1$2/sg; + $string =~ s/\n*=pod\b[^\n]*\n+/\n\n/sg; + $string =~ s/\n*=cut\b[^\n]*\n+/\n\n/sg; + $string =~ s/^\n+//s; + return $string; +} + +sub _write { + local *FH; + if ( $] >= 5.006 ) { + open( FH, '>', $_[0] ) or die "open($_[0]): $!"; + } else { + open( FH, "> $_[0]" ) or die "open($_[0]): $!"; + } + foreach ( 1 .. $#_ ) { + print FH $_[$_] or die "print($_[0]): $!"; + } + close FH or die "close($_[0]): $!"; +} + +# _version is for processing module versions (eg, 1.03_05) not +# Perl versions (eg, 5.8.1). +sub _version ($) { + my $s = shift || 0; + my $d =()= $s =~ /(\.)/g; + if ( $d >= 2 ) { + # Normalise multipart versions + $s =~ s/(\.)(\d{1,3})/sprintf("$1%03d",$2)/eg; + } + $s =~ s/^(\d+)\.?//; + my $l = $1 || 0; + my @v = map { + $_ . '0' x (3 - length $_) + } $s =~ /(\d{1,3})\D?/g; + $l = $l . '.' . join '', @v if @v; + return $l + 0; +} + +sub _cmp ($$) { + _version($_[0]) <=> _version($_[1]); +} + +# Cloned from Params::Util::_CLASS +sub _CLASS ($) { + ( + defined $_[0] + and + ! ref $_[0] + and + $_[0] =~ m/^[^\W\d]\w*(?:::\w+)*\z/s + ) ? $_[0] : undef; +} + +1; + +# Copyright 2008 - 2009 Adam Kennedy. diff --git a/inc/Module/Install/AutoInstall.pm b/inc/Module/Install/AutoInstall.pm new file mode 100644 index 0000000..58dd026 --- /dev/null +++ b/inc/Module/Install/AutoInstall.pm @@ -0,0 +1,61 @@ +#line 1 +package Module::Install::AutoInstall; + +use strict; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +sub AutoInstall { $_[0] } + +sub run { + my $self = shift; + $self->auto_install_now(@_); +} + +sub write { + my $self = shift; + $self->auto_install(@_); +} + +sub auto_install { + my $self = shift; + return if $self->{done}++; + + # Flatten array of arrays into a single array + my @core = map @$_, map @$_, grep ref, + $self->build_requires, $self->requires; + + my @config = @_; + + # We'll need Module::AutoInstall + $self->include('Module::AutoInstall'); + require Module::AutoInstall; + + Module::AutoInstall->import( + (@config ? (-config => \@config) : ()), + (@core ? (-core => \@core) : ()), + $self->features, + ); + + $self->makemaker_args( Module::AutoInstall::_make_args() ); + + my $class = ref($self); + $self->postamble( + "# --- $class section:\n" . + Module::AutoInstall::postamble() + ); +} + +sub auto_install_now { + my $self = shift; + $self->auto_install(@_); + Module::AutoInstall::do_install(); +} + +1; diff --git a/inc/Module/Install/Base.pm b/inc/Module/Install/Base.pm new file mode 100644 index 0000000..60a74d2 --- /dev/null +++ b/inc/Module/Install/Base.pm @@ -0,0 +1,78 @@ +#line 1 +package Module::Install::Base; + +use strict 'vars'; +use vars qw{$VERSION}; +BEGIN { + $VERSION = '0.91'; +} + +# Suspend handler for "redefined" warnings +BEGIN { + my $w = $SIG{__WARN__}; + $SIG{__WARN__} = sub { $w }; +} + +#line 42 + +sub new { + my $class = shift; + unless ( defined &{"${class}::call"} ) { + *{"${class}::call"} = sub { shift->_top->call(@_) }; + } + unless ( defined &{"${class}::load"} ) { + *{"${class}::load"} = sub { shift->_top->load(@_) }; + } + bless { @_ }, $class; +} + +#line 61 + +sub AUTOLOAD { + local $@; + my $func = eval { shift->_top->autoload } or return; + goto &$func; +} + +#line 75 + +sub _top { + $_[0]->{_top}; +} + +#line 90 + +sub admin { + $_[0]->_top->{admin} + or + Module::Install::Base::FakeAdmin->new; +} + +#line 106 + +sub is_admin { + $_[0]->admin->VERSION; +} + +sub DESTROY {} + +package Module::Install::Base::FakeAdmin; + +my $fake; + +sub new { + $fake ||= bless(\@_, $_[0]); +} + +sub AUTOLOAD {} + +sub DESTROY {} + +# Restore warning handler +BEGIN { + $SIG{__WARN__} = $SIG{__WARN__}->(); +} + +1; + +#line 154 diff --git a/inc/Module/Install/Can.pm b/inc/Module/Install/Can.pm new file mode 100644 index 0000000..e65e4f6 --- /dev/null +++ b/inc/Module/Install/Can.pm @@ -0,0 +1,81 @@ +#line 1 +package Module::Install::Can; + +use strict; +use Config (); +use File::Spec (); +use ExtUtils::MakeMaker (); +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +# check if we can load some module +### Upgrade this to not have to load the module if possible +sub can_use { + my ($self, $mod, $ver) = @_; + $mod =~ s{::|\\}{/}g; + $mod .= '.pm' unless $mod =~ /\.pm$/i; + + my $pkg = $mod; + $pkg =~ s{/}{::}g; + $pkg =~ s{\.pm$}{}i; + + local $@; + eval { require $mod; $pkg->VERSION($ver || 0); 1 }; +} + +# check if we can run some command +sub can_run { + my ($self, $cmd) = @_; + + my $_cmd = $cmd; + return $_cmd if (-x $_cmd or $_cmd = MM->maybe_command($_cmd)); + + for my $dir ((split /$Config::Config{path_sep}/, $ENV{PATH}), '.') { + next if $dir eq ''; + my $abs = File::Spec->catfile($dir, $_[1]); + return $abs if (-x $abs or $abs = MM->maybe_command($abs)); + } + + return; +} + +# can we locate a (the) C compiler +sub can_cc { + my $self = shift; + my @chunks = split(/ /, $Config::Config{cc}) or return; + + # $Config{cc} may contain args; try to find out the program part + while (@chunks) { + return $self->can_run("@chunks") || (pop(@chunks), next); + } + + return; +} + +# Fix Cygwin bug on maybe_command(); +if ( $^O eq 'cygwin' ) { + require ExtUtils::MM_Cygwin; + require ExtUtils::MM_Win32; + if ( ! defined(&ExtUtils::MM_Cygwin::maybe_command) ) { + *ExtUtils::MM_Cygwin::maybe_command = sub { + my ($self, $file) = @_; + if ($file =~ m{^/cygdrive/}i and ExtUtils::MM_Win32->can('maybe_command')) { + ExtUtils::MM_Win32->maybe_command($file); + } else { + ExtUtils::MM_Unix->maybe_command($file); + } + } + } +} + +1; + +__END__ + +#line 156 diff --git a/inc/Module/Install/Fetch.pm b/inc/Module/Install/Fetch.pm new file mode 100644 index 0000000..05f2079 --- /dev/null +++ b/inc/Module/Install/Fetch.pm @@ -0,0 +1,93 @@ +#line 1 +package Module::Install::Fetch; + +use strict; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +sub get_file { + my ($self, %args) = @_; + my ($scheme, $host, $path, $file) = + $args{url} =~ m|^(\w+)://([^/]+)(.+)/(.+)| or return; + + if ( $scheme eq 'http' and ! eval { require LWP::Simple; 1 } ) { + $args{url} = $args{ftp_url} + or (warn("LWP support unavailable!\n"), return); + ($scheme, $host, $path, $file) = + $args{url} =~ m|^(\w+)://([^/]+)(.+)/(.+)| or return; + } + + $|++; + print "Fetching '$file' from $host... "; + + unless (eval { require Socket; Socket::inet_aton($host) }) { + warn "'$host' resolve failed!\n"; + return; + } + + return unless $scheme eq 'ftp' or $scheme eq 'http'; + + require Cwd; + my $dir = Cwd::getcwd(); + chdir $args{local_dir} or return if exists $args{local_dir}; + + if (eval { require LWP::Simple; 1 }) { + LWP::Simple::mirror($args{url}, $file); + } + elsif (eval { require Net::FTP; 1 }) { eval { + # use Net::FTP to get past firewall + my $ftp = Net::FTP->new($host, Passive => 1, Timeout => 600); + $ftp->login("anonymous", 'anonymous@example.com'); + $ftp->cwd($path); + $ftp->binary; + $ftp->get($file) or (warn("$!\n"), return); + $ftp->quit; + } } + elsif (my $ftp = $self->can_run('ftp')) { eval { + # no Net::FTP, fallback to ftp.exe + require FileHandle; + my $fh = FileHandle->new; + + local $SIG{CHLD} = 'IGNORE'; + unless ($fh->open("|$ftp -n")) { + warn "Couldn't open ftp: $!\n"; + chdir $dir; return; + } + + my @dialog = split(/\n/, <<"END_FTP"); +open $host +user anonymous anonymous\@example.com +cd $path +binary +get $file $file +quit +END_FTP + foreach (@dialog) { $fh->print("$_\n") } + $fh->close; + } } + else { + warn "No working 'ftp' program available!\n"; + chdir $dir; return; + } + + unless (-f $file) { + warn "Fetching failed: $@\n"; + chdir $dir; return; + } + + return if exists $args{size} and -s $file != $args{size}; + system($args{run}) if exists $args{run}; + unlink($file) if $args{remove}; + + print(((!exists $args{check_for} or -e $args{check_for}) + ? "done!" : "failed! ($!)"), "\n"); + chdir $dir; return !$?; +} + +1; diff --git a/inc/Module/Install/Include.pm b/inc/Module/Install/Include.pm new file mode 100644 index 0000000..7e792e0 --- /dev/null +++ b/inc/Module/Install/Include.pm @@ -0,0 +1,34 @@ +#line 1 +package Module::Install::Include; + +use strict; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +sub include { + shift()->admin->include(@_); +} + +sub include_deps { + shift()->admin->include_deps(@_); +} + +sub auto_include { + shift()->admin->auto_include(@_); +} + +sub auto_include_deps { + shift()->admin->auto_include_deps(@_); +} + +sub auto_include_dependent_dists { + shift()->admin->auto_include_dependent_dists(@_); +} + +1; diff --git a/inc/Module/Install/Makefile.pm b/inc/Module/Install/Makefile.pm new file mode 100644 index 0000000..98779db --- /dev/null +++ b/inc/Module/Install/Makefile.pm @@ -0,0 +1,268 @@ +#line 1 +package Module::Install::Makefile; + +use strict 'vars'; +use ExtUtils::MakeMaker (); +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +sub Makefile { $_[0] } + +my %seen = (); + +sub prompt { + shift; + + # Infinite loop protection + my @c = caller(); + if ( ++$seen{"$c[1]|$c[2]|$_[0]"} > 3 ) { + die "Caught an potential prompt infinite loop ($c[1]|$c[2]|$_[0])"; + } + + # In automated testing, always use defaults + if ( $ENV{AUTOMATED_TESTING} and ! $ENV{PERL_MM_USE_DEFAULT} ) { + local $ENV{PERL_MM_USE_DEFAULT} = 1; + goto &ExtUtils::MakeMaker::prompt; + } else { + goto &ExtUtils::MakeMaker::prompt; + } +} + +sub makemaker_args { + my $self = shift; + my $args = ( $self->{makemaker_args} ||= {} ); + %$args = ( %$args, @_ ); + return $args; +} + +# For mm args that take multiple space-seperated args, +# append an argument to the current list. +sub makemaker_append { + my $self = sShift; + my $name = shift; + my $args = $self->makemaker_args; + $args->{name} = defined $args->{$name} + ? join( ' ', $args->{name}, @_ ) + : join( ' ', @_ ); +} + +sub build_subdirs { + my $self = shift; + my $subdirs = $self->makemaker_args->{DIR} ||= []; + for my $subdir (@_) { + push @$subdirs, $subdir; + } +} + +sub clean_files { + my $self = shift; + my $clean = $self->makemaker_args->{clean} ||= {}; + %$clean = ( + %$clean, + FILES => join ' ', grep { length $_ } ($clean->{FILES} || (), @_), + ); +} + +sub realclean_files { + my $self = shift; + my $realclean = $self->makemaker_args->{realclean} ||= {}; + %$realclean = ( + %$realclean, + FILES => join ' ', grep { length $_ } ($realclean->{FILES} || (), @_), + ); +} + +sub libs { + my $self = shift; + my $libs = ref $_[0] ? shift : [ shift ]; + $self->makemaker_args( LIBS => $libs ); +} + +sub inc { + my $self = shift; + $self->makemaker_args( INC => shift ); +} + +my %test_dir = (); + +sub _wanted_t { + /\.t$/ and -f $_ and $test_dir{$File::Find::dir} = 1; +} + +sub tests_recursive { + my $self = shift; + if ( $self->tests ) { + die "tests_recursive will not work if tests are already defined"; + } + my $dir = shift || 't'; + unless ( -d $dir ) { + die "tests_recursive dir '$dir' does not exist"; + } + %test_dir = (); + require File::Find; + File::Find::find( \&_wanted_t, $dir ); + $self->tests( join ' ', map { "$_/*.t" } sort keys %test_dir ); +} + +sub write { + my $self = shift; + die "&Makefile->write() takes no arguments\n" if @_; + + # Check the current Perl version + my $perl_version = $self->perl_version; + if ( $perl_version ) { + eval "use $perl_version; 1" + or die "ERROR: perl: Version $] is installed, " + . "but we need version >= $perl_version"; + } + + # Make sure we have a new enough MakeMaker + require ExtUtils::MakeMaker; + + if ( $perl_version and $self->_cmp($perl_version, '5.006') >= 0 ) { + # MakeMaker can complain about module versions that include + # an underscore, even though its own version may contain one! + # Hence the funny regexp to get rid of it. See RT #35800 + # for details. + $self->build_requires( 'ExtUtils::MakeMaker' => $ExtUtils::MakeMaker::VERSION =~ /^(\d+\.\d+)/ ); + $self->configure_requires( 'ExtUtils::MakeMaker' => $ExtUtils::MakeMaker::VERSION =~ /^(\d+\.\d+)/ ); + } else { + # Allow legacy-compatibility with 5.005 by depending on the + # most recent EU:MM that supported 5.005. + $self->build_requires( 'ExtUtils::MakeMaker' => 6.42 ); + $self->configure_requires( 'ExtUtils::MakeMaker' => 6.42 ); + } + + # Generate the MakeMaker params + my $args = $self->makemaker_args; + $args->{DISTNAME} = $self->name; + $args->{NAME} = $self->module_name || $self->name; + $args->{VERSION} = $self->version; + $args->{NAME} =~ s/-/::/g; + if ( $self->tests ) { + $args->{test} = { TESTS => $self->tests }; + } + if ( $] >= 5.005 ) { + $args->{ABSTRACT} = $self->abstract; + $args->{AUTHOR} = $self->author; + } + if ( eval($ExtUtils::MakeMaker::VERSION) >= 6.10 ) { + $args->{NO_META} = 1; + } + if ( eval($ExtUtils::MakeMaker::VERSION) > 6.17 and $self->sign ) { + $args->{SIGN} = 1; + } + unless ( $self->is_admin ) { + delete $args->{SIGN}; + } + + # Merge both kinds of requires into prereq_pm + my $prereq = ($args->{PREREQ_PM} ||= {}); + %$prereq = ( %$prereq, + map { @$_ } + map { @$_ } + grep $_, + ($self->configure_requires, $self->build_requires, $self->requires) + ); + + # Remove any reference to perl, PREREQ_PM doesn't support it + delete $args->{PREREQ_PM}->{perl}; + + # merge both kinds of requires into prereq_pm + my $subdirs = ($args->{DIR} ||= []); + if ($self->bundles) { + foreach my $bundle (@{ $self->bundles }) { + my ($file, $dir) = @$bundle; + push @$subdirs, $dir if -d $dir; + delete $prereq->{$file}; + } + } + + if ( my $perl_version = $self->perl_version ) { + eval "use $perl_version; 1" + or die "ERROR: perl: Version $] is installed, " + . "but we need version >= $perl_version"; + } + + $args->{INSTALLDIRS} = $self->installdirs; + + my %args = map { ( $_ => $args->{$_} ) } grep {defined($args->{$_})} keys %$args; + + my $user_preop = delete $args{dist}->{PREOP}; + if (my $preop = $self->admin->preop($user_preop)) { + foreach my $key ( keys %$preop ) { + $args{dist}->{$key} = $preop->{$key}; + } + } + + my $mm = ExtUtils::MakeMaker::WriteMakefile(%args); + $self->fix_up_makefile($mm->{FIRST_MAKEFILE} || 'Makefile'); +} + +sub fix_up_makefile { + my $self = shift; + my $makefile_name = shift; + my $top_class = ref($self->_top) || ''; + my $top_version = $self->_top->VERSION || ''; + + my $preamble = $self->preamble + ? "# Preamble by $top_class $top_version\n" + . $self->preamble + : ''; + my $postamble = "# Postamble by $top_class $top_version\n" + . ($self->postamble || ''); + + local *MAKEFILE; + open MAKEFILE, "< $makefile_name" or die "fix_up_makefile: Couldn't open $makefile_name: $!"; + my $makefile = do { local $/; }; + close MAKEFILE or die $!; + + $makefile =~ s/\b(test_harness\(\$\(TEST_VERBOSE\), )/$1'inc', /; + $makefile =~ s/( -I\$\(INST_ARCHLIB\))/ -Iinc$1/g; + $makefile =~ s/( "-I\$\(INST_LIB\)")/ "-Iinc"$1/g; + $makefile =~ s/^(FULLPERL = .*)/$1 "-Iinc"/m; + $makefile =~ s/^(PERL = .*)/$1 "-Iinc"/m; + + # Module::Install will never be used to build the Core Perl + # Sometimes PERL_LIB and PERL_ARCHLIB get written anyway, which breaks + # PREFIX/PERL5LIB, and thus, install_share. Blank them if they exist + $makefile =~ s/^PERL_LIB = .+/PERL_LIB =/m; + #$makefile =~ s/^PERL_ARCHLIB = .+/PERL_ARCHLIB =/m; + + # Perl 5.005 mentions PERL_LIB explicitly, so we have to remove that as well. + $makefile =~ s/(\"?)-I\$\(PERL_LIB\)\1//g; + + # XXX - This is currently unused; not sure if it breaks other MM-users + # $makefile =~ s/^pm_to_blib\s+:\s+/pm_to_blib :: /mg; + + open MAKEFILE, "> $makefile_name" or die "fix_up_makefile: Couldn't open $makefile_name: $!"; + print MAKEFILE "$preamble$makefile$postamble" or die $!; + close MAKEFILE or die $!; + + 1; +} + +sub preamble { + my ($self, $text) = @_; + $self->{preamble} = $text . $self->{preamble} if defined $text; + $self->{preamble}; +} + +sub postamble { + my ($self, $text) = @_; + $self->{postamble} ||= $self->admin->postamble; + $self->{postamble} .= $text if defined $text; + $self->{postamble} +} + +1; + +__END__ + +#line 394 diff --git a/inc/Module/Install/Metadata.pm b/inc/Module/Install/Metadata.pm new file mode 100644 index 0000000..653193d --- /dev/null +++ b/inc/Module/Install/Metadata.pm @@ -0,0 +1,624 @@ +#line 1 +package Module::Install::Metadata; + +use strict 'vars'; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +my @boolean_keys = qw{ + sign +}; + +my @scalar_keys = qw{ + name + module_name + abstract + author + version + distribution_type + tests + installdirs +}; + +my @tuple_keys = qw{ + configure_requires + build_requires + requires + recommends + bundles + resources +}; + +my @resource_keys = qw{ + homepage + bugtracker + repository +}; + +my @array_keys = qw{ + keywords +}; + +sub Meta { shift } +sub Meta_BooleanKeys { @boolean_keys } +sub Meta_ScalarKeys { @scalar_keys } +sub Meta_TupleKeys { @tuple_keys } +sub Meta_ResourceKeys { @resource_keys } +sub Meta_ArrayKeys { @array_keys } + +foreach my $key ( @boolean_keys ) { + *$key = sub { + my $self = shift; + if ( defined wantarray and not @_ ) { + return $self->{values}->{$key}; + } + $self->{values}->{$key} = ( @_ ? $_[0] : 1 ); + return $self; + }; +} + +foreach my $key ( @scalar_keys ) { + *$key = sub { + my $self = shift; + return $self->{values}->{$key} if defined wantarray and !@_; + $self->{values}->{$key} = shift; + return $self; + }; +} + +foreach my $key ( @array_keys ) { + *$key = sub { + my $self = shift; + return $self->{values}->{$key} if defined wantarray and !@_; + $self->{values}->{$key} ||= []; + push @{$self->{values}->{$key}}, @_; + return $self; + }; +} + +foreach my $key ( @resource_keys ) { + *$key = sub { + my $self = shift; + unless ( @_ ) { + return () unless $self->{values}->{resources}; + return map { $_->[1] } + grep { $_->[0] eq $key } + @{ $self->{values}->{resources} }; + } + return $self->{values}->{resources}->{$key} unless @_; + my $uri = shift or die( + "Did not provide a value to $key()" + ); + $self->resources( $key => $uri ); + return 1; + }; +} + +foreach my $key ( grep { $_ ne "resources" } @tuple_keys) { + *$key = sub { + my $self = shift; + return $self->{values}->{$key} unless @_; + my @added; + while ( @_ ) { + my $module = shift or last; + my $version = shift || 0; + push @added, [ $module, $version ]; + } + push @{ $self->{values}->{$key} }, @added; + return map {@$_} @added; + }; +} + +# Resource handling +my %lc_resource = map { $_ => 1 } qw{ + homepage + license + bugtracker + repository +}; + +sub resources { + my $self = shift; + while ( @_ ) { + my $name = shift or last; + my $value = shift or next; + if ( $name eq lc $name and ! $lc_resource{$name} ) { + die("Unsupported reserved lowercase resource '$name'"); + } + $self->{values}->{resources} ||= []; + push @{ $self->{values}->{resources} }, [ $name, $value ]; + } + $self->{values}->{resources}; +} + +# Aliases for build_requires that will have alternative +# meanings in some future version of META.yml. +sub test_requires { shift->build_requires(@_) } +sub install_requires { shift->build_requires(@_) } + +# Aliases for installdirs options +sub install_as_core { $_[0]->installdirs('perl') } +sub install_as_cpan { $_[0]->installdirs('site') } +sub install_as_site { $_[0]->installdirs('site') } +sub install_as_vendor { $_[0]->installdirs('vendor') } + +sub dynamic_config { + my $self = shift; + unless ( @_ ) { + warn "You MUST provide an explicit true/false value to dynamic_config\n"; + return $self; + } + $self->{values}->{dynamic_config} = $_[0] ? 1 : 0; + return 1; +} + +sub perl_version { + my $self = shift; + return $self->{values}->{perl_version} unless @_; + my $version = shift or die( + "Did not provide a value to perl_version()" + ); + + # Normalize the version + $version = $self->_perl_version($version); + + # We don't support the reall old versions + unless ( $version >= 5.005 ) { + die "Module::Install only supports 5.005 or newer (use ExtUtils::MakeMaker)\n"; + } + + $self->{values}->{perl_version} = $version; +} + +#Stolen from M::B +my %license_urls = ( + perl => 'http://dev.perl.org/licenses/', + apache => 'http://apache.org/licenses/LICENSE-2.0', + artistic => 'http://opensource.org/licenses/artistic-license.php', + artistic_2 => 'http://opensource.org/licenses/artistic-license-2.0.php', + lgpl => 'http://opensource.org/licenses/lgpl-license.php', + lgpl2 => 'http://opensource.org/licenses/lgpl-2.1.php', + lgpl3 => 'http://opensource.org/licenses/lgpl-3.0.html', + bsd => 'http://opensource.org/licenses/bsd-license.php', + gpl => 'http://opensource.org/licenses/gpl-license.php', + gpl2 => 'http://opensource.org/licenses/gpl-2.0.php', + gpl3 => 'http://opensource.org/licenses/gpl-3.0.html', + mit => 'http://opensource.org/licenses/mit-license.php', + mozilla => 'http://opensource.org/licenses/mozilla1.1.php', + open_source => undef, + unrestricted => undef, + restrictive => undef, + unknown => undef, +); + +sub license { + my $self = shift; + return $self->{values}->{license} unless @_; + my $license = shift or die( + 'Did not provide a value to license()' + ); + $self->{values}->{license} = $license; + + # Automatically fill in license URLs + if ( $license_urls{$license} ) { + $self->resources( license => $license_urls{$license} ); + } + + return 1; +} + +sub all_from { + my ( $self, $file ) = @_; + + unless ( defined($file) ) { + my $name = $self->name or die( + "all_from called with no args without setting name() first" + ); + $file = join('/', 'lib', split(/-/, $name)) . '.pm'; + $file =~ s{.*/}{} unless -e $file; + unless ( -e $file ) { + die("all_from cannot find $file from $name"); + } + } + unless ( -f $file ) { + die("The path '$file' does not exist, or is not a file"); + } + + # Some methods pull from POD instead of code. + # If there is a matching .pod, use that instead + my $pod = $file; + $pod =~ s/\.pm$/.pod/i; + $pod = $file unless -e $pod; + + # Pull the different values + $self->name_from($file) unless $self->name; + $self->version_from($file) unless $self->version; + $self->perl_version_from($file) unless $self->perl_version; + $self->author_from($pod) unless $self->author; + $self->license_from($pod) unless $self->license; + $self->abstract_from($pod) unless $self->abstract; + + return 1; +} + +sub provides { + my $self = shift; + my $provides = ( $self->{values}->{provides} ||= {} ); + %$provides = (%$provides, @_) if @_; + return $provides; +} + +sub auto_provides { + my $self = shift; + return $self unless $self->is_admin; + unless (-e 'MANIFEST') { + warn "Cannot deduce auto_provides without a MANIFEST, skipping\n"; + return $self; + } + # Avoid spurious warnings as we are not checking manifest here. + local $SIG{__WARN__} = sub {1}; + require ExtUtils::Manifest; + local *ExtUtils::Manifest::manicheck = sub { return }; + + require Module::Build; + my $build = Module::Build->new( + dist_name => $self->name, + dist_version => $self->version, + license => $self->license, + ); + $self->provides( %{ $build->find_dist_packages || {} } ); +} + +sub feature { + my $self = shift; + my $name = shift; + my $features = ( $self->{values}->{features} ||= [] ); + my $mods; + + if ( @_ == 1 and ref( $_[0] ) ) { + # The user used ->feature like ->features by passing in the second + # argument as a reference. Accomodate for that. + $mods = $_[0]; + } else { + $mods = \@_; + } + + my $count = 0; + push @$features, ( + $name => [ + map { + ref($_) ? ( ref($_) eq 'HASH' ) ? %$_ : @$_ : $_ + } @$mods + ] + ); + + return @$features; +} + +sub features { + my $self = shift; + while ( my ( $name, $mods ) = splice( @_, 0, 2 ) ) { + $self->feature( $name, @$mods ); + } + return $self->{values}->{features} + ? @{ $self->{values}->{features} } + : (); +} + +sub no_index { + my $self = shift; + my $type = shift; + push @{ $self->{values}->{no_index}->{$type} }, @_ if $type; + return $self->{values}->{no_index}; +} + +sub read { + my $self = shift; + $self->include_deps( 'YAML::Tiny', 0 ); + + require YAML::Tiny; + my $data = YAML::Tiny::LoadFile('META.yml'); + + # Call methods explicitly in case user has already set some values. + while ( my ( $key, $value ) = each %$data ) { + next unless $self->can($key); + if ( ref $value eq 'HASH' ) { + while ( my ( $module, $version ) = each %$value ) { + $self->can($key)->($self, $module => $version ); + } + } else { + $self->can($key)->($self, $value); + } + } + return $self; +} + +sub write { + my $self = shift; + return $self unless $self->is_admin; + $self->admin->write_meta; + return $self; +} + +sub version_from { + require ExtUtils::MM_Unix; + my ( $self, $file ) = @_; + $self->version( ExtUtils::MM_Unix->parse_version($file) ); +} + +sub abstract_from { + require ExtUtils::MM_Unix; + my ( $self, $file ) = @_; + $self->abstract( + bless( + { DISTNAME => $self->name }, + 'ExtUtils::MM_Unix' + )->parse_abstract($file) + ); +} + +# Add both distribution and module name +sub name_from { + my ($self, $file) = @_; + if ( + Module::Install::_read($file) =~ m/ + ^ \s* + package \s* + ([\w:]+) + \s* ; + /ixms + ) { + my ($name, $module_name) = ($1, $1); + $name =~ s{::}{-}g; + $self->name($name); + unless ( $self->module_name ) { + $self->module_name($module_name); + } + } else { + die("Cannot determine name from $file\n"); + } +} + +sub perl_version_from { + my $self = shift; + if ( + Module::Install::_read($_[0]) =~ m/ + ^ + (?:use|require) \s* + v? + ([\d_\.]+) + \s* ; + /ixms + ) { + my $perl_version = $1; + $perl_version =~ s{_}{}g; + $self->perl_version($perl_version); + } else { + warn "Cannot determine perl version info from $_[0]\n"; + return; + } +} + +sub author_from { + my $self = shift; + my $content = Module::Install::_read($_[0]); + if ($content =~ m/ + =head \d \s+ (?:authors?)\b \s* + ([^\n]*) + | + =head \d \s+ (?:licen[cs]e|licensing|copyright|legal)\b \s* + .*? copyright .*? \d\d\d[\d.]+ \s* (?:\bby\b)? \s* + ([^\n]*) + /ixms) { + my $author = $1 || $2; + $author =~ s{E}{<}g; + $author =~ s{E}{>}g; + $self->author($author); + } else { + warn "Cannot determine author info from $_[0]\n"; + } +} + +sub license_from { + my $self = shift; + if ( + Module::Install::_read($_[0]) =~ m/ + ( + =head \d \s+ + (?:licen[cs]e|licensing|copyright|legal)\b + .*? + ) + (=head\\d.*|=cut.*|) + \z + /ixms ) { + my $license_text = $1; + my @phrases = ( + 'under the same (?:terms|license) as (?:perl|the perl programming language) itself' => 'perl', 1, + 'GNU general public license' => 'gpl', 1, + 'GNU public license' => 'gpl', 1, + 'GNU lesser general public license' => 'lgpl', 1, + 'GNU lesser public license' => 'lgpl', 1, + 'GNU library general public license' => 'lgpl', 1, + 'GNU library public license' => 'lgpl', 1, + 'BSD license' => 'bsd', 1, + 'Artistic license' => 'artistic', 1, + 'GPL' => 'gpl', 1, + 'LGPL' => 'lgpl', 1, + 'BSD' => 'bsd', 1, + 'Artistic' => 'artistic', 1, + 'MIT' => 'mit', 1, + 'proprietary' => 'proprietary', 0, + ); + while ( my ($pattern, $license, $osi) = splice(@phrases, 0, 3) ) { + $pattern =~ s{\s+}{\\s+}g; + if ( $license_text =~ /\b$pattern\b/i ) { + $self->license($license); + return 1; + } + } + } + + warn "Cannot determine license info from $_[0]\n"; + return 'unknown'; +} + +sub _extract_bugtracker { + my @links = $_[0] =~ m#L<(\Qhttp://rt.cpan.org/\E[^>]+)>#g; + my %links; + @links{@links}=(); + @links=keys %links; + return @links; +} + +sub bugtracker_from { + my $self = shift; + my $content = Module::Install::_read($_[0]); + my @links = _extract_bugtracker($content); + unless ( @links ) { + warn "Cannot determine bugtracker info from $_[0]\n"; + return 0; + } + if ( @links > 1 ) { + warn "Found more than on rt.cpan.org link in $_[0]\n"; + return 0; + } + + # Set the bugtracker + bugtracker( $links[0] ); + return 1; +} + +sub requires_from { + my $self = shift; + my $content = Module::Install::_readperl($_[0]); + my @requires = $content =~ m/^use\s+([^\W\d]\w*(?:::\w+)*)\s+([\d\.]+)/mg; + while ( @requires ) { + my $module = shift @requires; + my $version = shift @requires; + $self->requires( $module => $version ); + } +} + +sub test_requires_from { + my $self = shift; + my $content = Module::Install::_readperl($_[0]); + my @requires = $content =~ m/^use\s+([^\W\d]\w*(?:::\w+)*)\s+([\d\.]+)/mg; + while ( @requires ) { + my $module = shift @requires; + my $version = shift @requires; + $self->test_requires( $module => $version ); + } +} + +# Convert triple-part versions (eg, 5.6.1 or 5.8.9) to +# numbers (eg, 5.006001 or 5.008009). +# Also, convert double-part versions (eg, 5.8) +sub _perl_version { + my $v = $_[-1]; + $v =~ s/^([1-9])\.([1-9]\d?\d?)$/sprintf("%d.%03d",$1,$2)/e; + $v =~ s/^([1-9])\.([1-9]\d?\d?)\.(0|[1-9]\d?\d?)$/sprintf("%d.%03d%03d",$1,$2,$3 || 0)/e; + $v =~ s/(\.\d\d\d)000$/$1/; + $v =~ s/_.+$//; + if ( ref($v) ) { + # Numify + $v = $v + 0; + } + return $v; +} + + + + + +###################################################################### +# MYMETA Support + +sub WriteMyMeta { + die "WriteMyMeta has been deprecated"; +} + +sub write_mymeta_yaml { + my $self = shift; + + # We need YAML::Tiny to write the MYMETA.yml file + unless ( eval { require YAML::Tiny; 1; } ) { + return 1; + } + + # Generate the data + my $meta = $self->_write_mymeta_data or return 1; + + # Save as the MYMETA.yml file + print "Writing MYMETA.yml\n"; + YAML::Tiny::DumpFile('MYMETA.yml', $meta); +} + +sub write_mymeta_json { + my $self = shift; + + # We need JSON to write the MYMETA.json file + unless ( eval { require JSON; 1; } ) { + return 1; + } + + # Generate the data + my $meta = $self->_write_mymeta_data or return 1; + + # Save as the MYMETA.yml file + print "Writing MYMETA.json\n"; + Module::Install::_write( + 'MYMETA.json', + JSON->new->pretty(1)->canonical->encode($meta), + ); +} + +sub _write_mymeta_data { + my $self = shift; + + # If there's no existing META.yml there is nothing we can do + return undef unless -f 'META.yml'; + + # We need Parse::CPAN::Meta to load the file + unless ( eval { require Parse::CPAN::Meta; 1; } ) { + return undef; + } + + # Merge the perl version into the dependencies + my $val = $self->Meta->{values}; + my $perl = delete $val->{perl_version}; + if ( $perl ) { + $val->{requires} ||= []; + my $requires = $val->{requires}; + + # Canonize to three-dot version after Perl 5.6 + if ( $perl >= 5.006 ) { + $perl =~ s{^(\d+)\.(\d\d\d)(\d*)}{join('.', $1, int($2||0), int($3||0))}e + } + unshift @$requires, [ perl => $perl ]; + } + + # Load the advisory META.yml file + my @yaml = Parse::CPAN::Meta::LoadFile('META.yml'); + my $meta = $yaml[0]; + + # Overwrite the non-configure dependency hashs + delete $meta->{requires}; + delete $meta->{build_requires}; + delete $meta->{recommends}; + if ( exists $val->{requires} ) { + $meta->{requires} = { map { @$_ } @{ $val->{requires} } }; + } + if ( exists $val->{build_requires} ) { + $meta->{build_requires} = { map { @$_ } @{ $val->{build_requires} } }; + } + + return $meta; +} + +1; diff --git a/inc/Module/Install/Win32.pm b/inc/Module/Install/Win32.pm new file mode 100644 index 0000000..f2f99df --- /dev/null +++ b/inc/Module/Install/Win32.pm @@ -0,0 +1,64 @@ +#line 1 +package Module::Install::Win32; + +use strict; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91'; + @ISA = 'Module::Install::Base'; + $ISCORE = 1; +} + +# determine if the user needs nmake, and download it if needed +sub check_nmake { + my $self = shift; + $self->load('can_run'); + $self->load('get_file'); + + require Config; + return unless ( + $^O eq 'MSWin32' and + $Config::Config{make} and + $Config::Config{make} =~ /^nmake\b/i and + ! $self->can_run('nmake') + ); + + print "The required 'nmake' executable not found, fetching it...\n"; + + require File::Basename; + my $rv = $self->get_file( + url => 'http://download.microsoft.com/download/vc15/Patch/1.52/W95/EN-US/Nmake15.exe', + ftp_url => 'ftp://ftp.microsoft.com/Softlib/MSLFILES/Nmake15.exe', + local_dir => File::Basename::dirname($^X), + size => 51928, + run => 'Nmake15.exe /o > nul', + check_for => 'Nmake.exe', + remove => 1, + ); + + die <<'END_MESSAGE' unless $rv; + +------------------------------------------------------------------------------- + +Since you are using Microsoft Windows, you will need the 'nmake' utility +before installation. It's available at: + + http://download.microsoft.com/download/vc15/Patch/1.52/W95/EN-US/Nmake15.exe + or + ftp://ftp.microsoft.com/Softlib/MSLFILES/Nmake15.exe + +Please download the file manually, save it to a directory in %PATH% (e.g. +C:\WINDOWS\COMMAND\), then launch the MS-DOS command line shell, "cd" to +that directory, and run "Nmake15.exe" from there; that will create the +'nmake.exe' file needed by this module. + +You may then resume the installation process described in README. + +------------------------------------------------------------------------------- +END_MESSAGE + +} + +1; diff --git a/inc/Module/Install/WriteAll.pm b/inc/Module/Install/WriteAll.pm new file mode 100644 index 0000000..12471e5 --- /dev/null +++ b/inc/Module/Install/WriteAll.pm @@ -0,0 +1,60 @@ +#line 1 +package Module::Install::WriteAll; + +use strict; +use Module::Install::Base (); + +use vars qw{$VERSION @ISA $ISCORE}; +BEGIN { + $VERSION = '0.91';; + @ISA = qw{Module::Install::Base}; + $ISCORE = 1; +} + +sub WriteAll { + my $self = shift; + my %args = ( + meta => 1, + sign => 0, + inline => 0, + check_nmake => 1, + @_, + ); + + $self->sign(1) if $args{sign}; + $self->admin->WriteAll(%args) if $self->is_admin; + + $self->check_nmake if $args{check_nmake}; + unless ( $self->makemaker_args->{PL_FILES} ) { + $self->makemaker_args( PL_FILES => {} ); + } + + # Until ExtUtils::MakeMaker support MYMETA.yml, make sure + # we clean it up properly ourself. + $self->realclean_files('MYMETA.yml'); + + if ( $args{inline} ) { + $self->Inline->write; + } else { + $self->Makefile->write; + } + + # The Makefile write process adds a couple of dependencies, + # so write the META.yml files after the Makefile. + if ( $args{meta} ) { + $self->Meta->write; + } + + # Experimental support for MYMETA + if ( $ENV{X_MYMETA} ) { + if ( $ENV{X_MYMETA} eq 'JSON' ) { + $self->Meta->write_mymeta_json; + } else { + $self->Meta->write_mymeta_yaml; + } + } + + return 1; +} + +1; diff --git a/lib/WWW/Google/SiteMap.pm b/lib/WWW/Google/SiteMap.pm new file mode 100644 index 0000000..9eaf410 --- /dev/null +++ b/lib/WWW/Google/SiteMap.pm @@ -0,0 +1,354 @@ +package WWW::Google::SiteMap; +use vars qw($VERSION); $VERSION = '1.10'; + +=head1 NAME + +WWW::Google::SiteMap - DEPRECATED - See Search::Sitemap + +=head1 DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to +L. + +=head1 SYNOPSIS + + use WWW::Google::SiteMap; + + my $map = WWW::Google::SiteMap->new(file => 'sitemap.gz'); + + # Main page, changes a lot because of the blog + $map->add(WWW::Google::SiteMap::URL->new( + loc => 'http://www.jasonkohles.com/', + lastmod => '2005-06-03', + changefreq => 'daily', + priority => 1.0, + )); + + # Top level directories, don't change as much, and have a lower priority + $map->add({ + loc => "http://www.jasonkohles.com/$_/", + changefreq => 'weekly', + priority => 0.9, # lower priority than the home page + }) for qw( + software gpg hamradio photos scuba snippets tools + ); + + $map->write; + +=head1 DESCRIPTION + +The Sitemap Protocol allows you to inform search engine crawlers about URLs +on your Web sites that are available for crawling. A Sitemap consists of a +list of URLs and may also contain additional information about those URLs, +such as when they were last modified, how frequently they change, etc. + +This module allows you to create and modify sitemaps. + +=cut + +use strict; +use warnings; +use WWW::Google::SiteMap::URL qw(); +use XML::Twig qw(); +unless($IO::Zlib::VERSION) { eval "use IO::Zlib ()"; } +my $ZLIB = $IO::Zlib::VERSION; +use IO::File qw(); +require UNIVERSAL; +use Carp qw(carp croak); +use HTML::Entities qw(decode_entities); + +=head1 METHODS + +=over 4 + +=item new() + +Creates a new WWW::Google::SiteMap object. + + my $map = WWW::Google::SiteMap->new( + file => 'sitemap.gz', + ); + +=cut + +sub new { + my $class = shift; + my %opts = @_; + my $self = bless({}, ref($class) || $class); + while(my($key,$value) = each %opts) { $self->$key($value) } + if($self->file && -e $self->file) { $self->read } + return $self; +} + +=item read() + +Read a sitemap in to this object. If a filename is specified, it will be +read from that file, otherwise it will be read from the file that was +specified with the file() method. Reading of compressed files is done +automatically if the filename ends with .gz. + +=cut + +sub read { + my $self = shift; + my $file = shift || $self->file || + croak "No filename specified for ".(ref($self)||$self)."::read"; + + # don't try to parse missing or empty files + # no errors for this, because we might be creating it + return unless -f $file && -s $file; + + # don't try to parse very small compressed files + # (empty .gz files are 20 bytes) + return if $file =~ /\.gz/ && -s $file < 50; + + my $fh; + if($file =~ /\.gz$/i) { + croak "IO::Zlib not available, cannot read compressed sitemaps" + unless $ZLIB; + $fh = IO::Zlib->new($file,"rb"); + } else { + $fh = IO::File->new($file,"r"); + } + my @urls = (); + my $urlparser = sub { + my $self = shift; + my $elt = shift; + + my $url = WWW::Google::SiteMap::URL->new(); + foreach my $c ($elt->children) { + my $var = $c->gi; + if($var eq 'loc') { + $url->$var(decode_entities($c->text)); + } else { + $url->$var($c->text); + } + } + $self->purge; + push(@urls,$url); + }; + my $twig = XML::Twig->new( + twig_roots => { + 'urlset/url' => $urlparser, + 'sitemapindex/sitemap' => $urlparser, + }, + ); + $twig->safe_parse(join('',$fh->getlines)) + or die "Could not parse $file ($@)"; + $self->urls(@urls); +} + +=item write([$file]) + +Write the sitemap out to the file. If a filename is specified, it will be +written to that file, otherwise it will be written to the file that was +specified with the file() method. Writing of compressed files is done +automatically if the filename ends with .gz. + +=cut + +sub write { + my $self = shift; + my $file = shift || $self->file || + croak "No filename specified for ".(ref($self)||$self)."::write"; + + my $fh; + if($file =~ /\.gz$/i) { + croak "IO::Zlib not available, cannot write compressed sitemaps" + unless $ZLIB; + $fh = IO::Zlib->new($file,"wb9"); + } else { + $fh = IO::File->new($file,"w"); + } + croak "Could not create '$file'" unless $fh; + $fh->print($self->xml); +} + +=item urls() + +Return the L objects that make up the sitemap. + +=cut + +sub urls { + my $self = shift; + $self->{urls} = \@_ if @_; + my @urls = grep { ref($_) && defined $_->loc } @{$self->{urls}}; + return wantarray ? @urls : \@urls; +} + +=item add($item,[$item...]) + +Add the L items listed to the sitemap. + +If you pass hashrefs instead of L objects, it +will turn them into objects for you. If the first item you pass is a +simple scalar that matches \w, it will assume that the values passed are +a hash for a single object. If the first item passed matches m{^\w+://} +(i.e. it looks like a URL) then all the arguments will be treated as URLs, +and L objects will be constructed for them, but only +the loc field will be populated. + +This means you can do any of these: + + # create the WWW::Google::SiteMap::URL object yourself + my $url = WWW::Google::SiteMap::URL->new( + loc => 'http://www.jasonkohles.com/', + priority => 1.0, + ); + $map->add($url); + + # or + $map->add( + { loc => 'http://www.jasonkohles.com/' }, + { loc => 'http://www.jasonkohles.com/software/google-sitemap/' }, + { loc => 'http://www.jasonkohles.com/software/geo-shapefile/' }, + ); + + # or + $map->add( + loc => 'http://www.jasonkohles.com/', + priority => 1.0, + ); + + # or even something funkier + $map->add(qw( + http://www.jasonkohles.com/ + http://www.jasonkohles.com/software/www-google-sitemap/ + http://www.jasonkohles.com/software/geo-shapefile/ + http://www.jasonkohles.com/software/text-fakedata/ + )); + foreach my $url ($map->urls) { $url->changefreq('daily') } + +=cut + +sub add { + my $self = shift; + if(ref($_[0])) { + if(UNIVERSAL::isa($_[0],"WWW::Google::SiteMap::URL")) { + push(@{$self->{urls}}, @_); + } elsif(ref($_[0]) =~ /HASH/) { + push(@{$self->{urls}},map { + WWW::Google::SiteMap::URL->new($_) + } @_); + } + } elsif($_[0] =~ /^\w+$/) { + push(@{$self->{urls}}, WWW::Google::SiteMap::URL->new(@_)); + } elsif($_[0] =~ m{^\w+://}) { + push(@{$self->{urls}}, map { + WWW::Google::SiteMap::URL->new(loc => $_) + } @_); + } else { + croak "Can't turn '".( + ref($_[0]) || $_[0] + )."' into WWW::Google::SiteMap::URL object"; + } +} + +=item xml(); + +Return the xml representation of the sitemap. + +=cut + +sub xml { + my $self = shift; + + my $xml = XML::Twig::Elt->new('urlset', { + 'xmlns' => 'http://www.google.com/schemas/sitemap/0.84', + 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation' => join(' ', + 'http://www.google.com/schemas/sitemap/0.84', + 'http://www.google.com/schemas/sitemap/0.84/sitemap.xsd', + ), + }); + foreach($self->urls) { + $_->as_elt->paste(last_child => $xml); + } + $xml->set_pretty_print($self->pretty); + my $header = ''; + if($self->pretty) { $header .= "\n" } + return $header.$xml->sprint(); +} + +=item file() + +Get or set the filename associated with this object. If you call read() or +write() without a filename, this is the default. + +=cut + +sub file { + my $self = shift; + $self->{file} = shift if @_; + return $self->{file}; +} + +=item pretty() + +Set this to a true value to enable 'pretty-printing' on the XML output. If +false (the default) the XML will be more compact but not as easily readable +for humans (Google and other computers won't care what you set this to). + +If you set this to a 'word' (something that matches /[a-z]/i), then that +value will be passed to XML::Twig directly (see the L pretty_print +documentation). Otherwise if a true value is passed, it means 'nice', and a +false value means 'none'. + +Returns the value it was set to, or the current value if called with no +arguments. + +=cut + +sub pretty { + my $self = shift; + my $val = shift || return $self->{pretty} || 'none'; + + if($val =~ /[a-z]/i) { + $self->{pretty} = $val; + } elsif($val) { + $self->{pretty} = 'nice'; + } else { + $self->{pretty} = 'none'; + } + return $self->{pretty}; +} + +=back + +=head1 MODULE HOME PAGE + +The home page of this module is +L. This is where you +can always find the latest version, development versions, and bug reports. You +will also find a link there to report bugs. + +=head1 SEE ALSO + +L + +L + +L + +L + +L + +=head1 AUTHOR + +Jason Kohles, Eemail@jasonkohles.comE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2005 by Jason Kohles + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.4 or, +at your option, any later version of Perl 5 you may have available. + +=cut + +1; +__END__ diff --git a/lib/WWW/Google/SiteMap/Index.pm b/lib/WWW/Google/SiteMap/Index.pm new file mode 100644 index 0000000..64afb5d --- /dev/null +++ b/lib/WWW/Google/SiteMap/Index.pm @@ -0,0 +1,189 @@ +package WWW::Google::SiteMap::Index; +use vars qw($VERSION); $VERSION = '1.10'; + +=head1 NAME + +WWW::Google::SiteMap::Index - DEPRECATED - See Search::Sitemap + +=head1 DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to +L. + +=head1 SYNOPSIS + + use WWW::Google::SiteMap::Index; + + my $index = WWW::Google::SiteMap::Index->new( + file => 'sitemap-index.gz', + ); + + $index->add(WWW::Google::SiteMap::URL->new( + loc => 'http://www.jasonkohles.com/sitemap1.gz', + lastmod => '2005-11-01', + )); + +=head1 DESCRIPTION + +A sitemap index is used to point Google at your sitemaps if you have more +than one of them. + +=cut + +use strict; +use warnings; +use base 'WWW::Google::SiteMap'; + +=head1 METHODS + +=over 4 + +=item new() + +Creates a new WWW::Google::SiteMap::Index object. + + my $index = WWW::Google::SiteMap::Index->new( + file => 'sitemap-index.gz', + ); + +=item read() + +Read a sitemap index in to this object. If a filename is specified, it will +be read from that file, otherwise it will be read from the file that was +specified with the file() method. Reading of compressed files is done +automatically if the filename ends with .gz. + +=item write([$file]); + +Write the sitemap index out to the file. If a filename is specified, it will +be written to that file, otherwise it will be written to the file that was +specified with the file() method. Writing of compressed files is done +automatically if the filename ends with .gz + +=item urls() + +Return the L objects that make up the sitemap index. + +=item add($item,[$item...]); + +Add the L items listed to the sitemap index. + +If you pass hashrefs instead of L objects, it +will turn them into objects for you. If the first item you pass is a +simple scalar that matches \w, it will assume that the values passed are +a hash for a single object. If the first item passed matches m{^\w+://} +(i.e. it looks like a URL) then all the arguments will be treated as URLs, +and L objects will be constructed for them, but only +the loc field will be populated. + +This means you can do any of these: + + # create the WWW::Google::SiteMap::URL object yourself + my $url = WWW::Google::SiteMap::URL->new( + loc => 'http://www.jasonkohles.com/sitemap1.gz', + ); + $map->add($url); + + # or + $map->add( + { loc => 'http://www.jasonkohles.com/sitemap1.gz' }, + { loc => 'http://www.jasonkohles.com/sitemap2.gz' }, + { loc => 'http://www.jasonkohles.com/sitemap3.gz' }, + ); + + # or + $map->add( + loc => 'http://www.jasonkohles.com/sitemap1.gz', + priority => 1.0, + ); + + # or even something funkier + $map->add(qw( + http://www.jasonkohles.com/ + http://www.jasonkohles.com/software/www-google-sitemap/ + http://www.jasonkohles.com/software/geo-shapefile/ + http://www.jasonkohles.com/software/text-fakedata/ + )); + foreach my $url ($map->urls) { $url->lastmod('2005-11-01') } + +=item xml(); + +Return the xml representation of the sitemap index. + +=cut + +sub xml { + my $self = shift; + + my $xml = XML::Twig::Elt->new('sitemapindex', { + 'xmlns' => 'http://www.google.com/schemas/sitemap/0.84', + 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation' => join(' ', + 'http://www.google.com/schemas/sitemap/0.84', + 'http://www.google.com/schemas/sitemap/0.84/siteindex.xsd', + ), + }); + foreach($self->urls) { + $_->as_elt('sitemap',qw(loc lastmod))->paste(last_child => $xml); + } + $xml->set_pretty_print($self->pretty); + my $header = ''; + if($self->pretty) { $header .= "\n" } + return $header.$xml->sprint(); +} + +=item file(); + +Get or set the filename associated with this object. If you call read() or +write() without a filename, this is the default. + +=item pretty() + +Set this to a true value to enable 'pretty-printing' on the XML output. If +false (the default) the XML will be more compact but not as easily readable +for humans (Google and other computers won't care what you set this to). + +If you set this to a 'word' (something that matches /[a-z]/i), then that +value will be passed to XML::Twig directly (see the L pretty_print +documentation). Otherwise if a true value is passed, it means 'nice', and a +false value means 'none'. + +Returns the value it was set to, or the current value if called with no +arguments. + +=back + +=head1 MODULE HOME PAGE + +The home page of this module is +L. This is where you +can always find the latest version, development versions, and bug reports. You +will also find a link there to report bugs. + +=head1 SEE ALSO + +L + +L + +L + +L + +=head1 AUTHOR + +Jason Kohles, Eemail@jasonkohles.comE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2005 by Jason Kohles + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.4 or, +at your option, any later version of Perl 5 you may have available. + +=cut + +1; +__END__ diff --git a/lib/WWW/Google/SiteMap/Ping.pm b/lib/WWW/Google/SiteMap/Ping.pm new file mode 100644 index 0000000..fd0ab71 --- /dev/null +++ b/lib/WWW/Google/SiteMap/Ping.pm @@ -0,0 +1,197 @@ +package WWW::Google::SiteMap::Ping; +use vars qw($VERSION); $VERSION = '1.10'; + +=head1 NAME + +WWW::Google::SiteMap::Ping - DEPRECATED - See Search::Sitemap + +=head1 DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to +L. + +=head1 SYNOPSIS + + use WWW::Google::SiteMap::Ping; + + my $ping = WWW::Google::SiteMap::Ping->new( + 'http://www.jasonkohles.com/sitemap.gz', + ); + + $ping->submit; + print "These pings succeeded:\n"; + foreach($ping->success) { + print "$_: ".$ping->status($_)."\n"; + } + print "These pings failed:\n"; + foreach($ping->failure) { + print "$_: ".$ping->status($_)."\n"; + } + +=head1 DESCRIPTION + +This module makes it easy to notify Google that your sitemaps, or sitemap +indexes, have been updated. See L and +L for tools to help you create sitemaps and +indexes. + +=cut + +use strict; +use warnings; +use LWP::UserAgent; +use URI::Escape qw(uri_escape); + +=head1 METHODS + +=over 4 + +=item new(); + +Create a new WWW::Google::SiteMap::Ping object. Can be given a list of +URLs which refer to sitemaps or sitemap indexes, these URLs will simply +be passed to url(). + +=cut + +sub new { + my $class = shift; + my $self = bless({}, ref($class) || $class); + $self->{urls} = {}; + $self->add_urls(@_); + return $self; +} + +=item add_urls(@urls); + +Add one or more urls to the list of URLs to submit to Google. + +=cut + +sub add_urls { + my $self = shift; + foreach(@_) { + $self->{urls}->{$_} ||= 'PENDING'; + } +} + +=item urls(); + +Return the list of urls that will be (or were) submitted to google. + +=cut + +sub urls { return keys %{shift()->{urls}}; } + +=item submit + +Submit the urls to Google, returns the number of successful submissions. This +module uses L for the web-based submissions, and will honor +proxy settings in the environment. See L for more information. + +=cut + +sub submit { + my $self = shift; + + my $ua = $self->user_agent(); + my $success = 0; + foreach my $url ($self->urls) { + my $ping = "http://www.google.com/webmasters/sitemaps/ping?". + "sitemap=".uri_escape($url); + my $response = $ua->get($ping); + if($response->is_success) { + $self->{urls}->{$url} = 'SUCCESS'; + $success++; + } else { + $self->{urls}->{$url} = $response->status_line; + } + } + + return $success; +} + +=item success(); + +Return the URLs that were successfully submitted. Note that success only +means that the request was successfully received by Google, it does not +mean your sitemap was found, loaded or parsed successfully. If you want +to know whether your sitemap was loaded or parsed successfully, you have +to go to L and check the status +there. + +=cut + +sub success { + my $self = shift; + + return grep { $self->{urls}->{$_} eq 'SUCCESS' } keys %{$self->{urls}}; +} + +=item failure(); + +Return the URLs that were not successfully submitted. + +=cut + +sub failure { + my $self = shift; + + return grep { $self->{urls}->{$_} ne 'SUCCESS' } keys %{$self->{urls}}; +} + +=item user_agent(); + +If called with no arguments, will return the current L object +which will be used to access the web-based submission. If called with an +arugment, you can set the user agent that will be used in case you need to +give it special arguments. It must be a L object. + +If you call submit without having provided a user agent, one will be created +for you that is a basic L object, which honors proxy settings +in the environment. + +=cut + +sub user_agent { + my $self = shift; + + if(@_) { $self->{_ua} = shift } + unless($self->{_ua}) { + $self->{_ua} = LWP::UserAgent->new(); + $self->{_ua}->env_proxy; + $self->{_ua}->timeout(10); + } + return $self->{_ua}; +} + +=back + +=head1 MODULE HOME PAGE + +The home page of this module is +L. This is where you +can always find the latest version, development versions, and bug reports. You +will also find a link there to report bugs. + +=head1 SEE ALSO + +L + +=head1 AUTHOR + +Jason Kohles, Eemail@jasonkohles.comE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2005 by Jason Kohles + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.4 or, +at your option, any later version of Perl 5 you may have available. + +=cut + +1; +__END__ diff --git a/lib/WWW/Google/SiteMap/Robot.pm b/lib/WWW/Google/SiteMap/Robot.pm new file mode 100644 index 0000000..58af408 --- /dev/null +++ b/lib/WWW/Google/SiteMap/Robot.pm @@ -0,0 +1,582 @@ +package WWW::Google::SiteMap::Robot; +use vars qw($VERSION); $VERSION = '1.10'; + +=head1 NAME + +WWW::Google::SiteMap::Robot - DEPRECATED - See Search::Sitemap + +=head1 DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to +L. + +=head1 SYNOPSIS + + use WWW::Google::SiteMap::Robot; + + my $robot = WWW::Google::SiteMap::Robot->new( + domain => 'www.jasonkohles.com', + restrict => qr{^http://www.jasonkohles.com/}, + starting_url => ['/index.html','/google-me.html'], + delay => 1, # delay in minutes + sitemap_file => '/var/www/html/sitemap.gz', + sitemap_url => 'http://www.jasonkohles.com/sitemap.gz', + user_agent => 'MyOwnSpider/1.0', + ); + $robot->run(); + +=head1 DESCRIPTION + +This is a simple robot class which subclasses L to create a +web-crawling spider. By giving it the URL to your home page, it will crawl +all the pages it can find and create a sitemap for them. + +=cut + +use strict; +use warnings; +use WWW::Mechanize; +use WWW::RobotRules; +use Carp qw(croak); +use POSIX qw(strftime); +use WWW::Google::SiteMap; +use WWW::Google::SiteMap::Ping; + +=head1 METHODS + +=over 4 + +=item new(); + +Create a new WWW::Google::SiteMap::Robot object. + +=cut + +sub new { + my $class = shift; + my %args = @_; + + my $self = bless({},ref($class)||$class); + + croak "No domain specified" unless $args{domain}; + + # These items have other methods that depend on them, so they need to + # be called in this order: + foreach my $x (qw(domain status_storage)) { + $self->$x(delete($args{$x})); + } + + while(my($k,$v) = each %args) { $self->$k($v) } + + return $self; +} + +=item domain(); + +Get/Set the domain name of the server you want to spider. This is used both +to create the initial URLs to put in the TO-DO list, as well as to create a +built-in restriction that prevents the robot from leaving your site. + +Google doesn't allow a sitemap to refer to URL's that are outside the domain +that the sitemap was retrieved for, so there really isn't any benefit in +allowing the robot to cross multiple domains. If you really think you need +to do this, you probably really just want more than one robot. If you are +absolutely certain you want to cross domain boundaries, then you'll have to +subclass this module, and Google will probably reject your sitemaps. + +=cut + +sub domain { + my $self = shift; + + if(@_) { $self->{domain} = shift } + return $self->{domain}; +} + +=item restrict(); + +Get/Set the url restrictions. The restriction list can be any of the +following: + +=over 4 + +=item A list reference (or a list) + +A list reference is assumed to contain a list of any of the following types. +When passed as an argument to the constructor it has to be a reference, but +when you are calling restrict() as a method, you can pass it a list, and it +will turn it into a list reference. If you provide more than one restrict +item in a list, the first one to return true will cause the rest of them to +be skipped, so the URL will be restricted (skipped) if any of the items are +true (if you want more complexity than that, then just use a code reference +by itself, which can do whatever it wants.) + +=item A code reference + +If you give restrict a code reference, it will be passed the URL that is +about to be spidered, if the code returns a true value, the URL will be +skipped. If it returns false, it will not be restricted. + +=item A regexp reference + +If you give it a regexp reference, then the regexp will be applied to the +URL about to be spidered, if the regexp matches, then the URL will be +skipped. + +=back + +If called with no arguments, it will return the current list of restrictions. + +There are built-in restrictions that are always applied at the end of your +restriction list. One is a url regexp that matches your domain name, to +prevent the robot from leaving your site (it's qr{^\w+://YOUR_DOMAIN/}). +The other is a restriction that excludes any URLs that are not allowed by +your robots.txt. This module doesn't provide any method for ignoring the +robots.txt restriction (because it's dangerous), you should really modify +your robots.txt to allow this robot to bypass any of the restrictions you +don't want it to honor. + +For example, if your robot.txt contains: + + User-Agent: * + Disallow: /admin + Disallow: /google-stuff + +Then those two paths will not be included in your sitemap. If you decided +you actually did want /google-stuff to appear in your sitemap, you could add +this to your robots.txt: + + User-Agent: WWWGoogleSiteMapRobot + Disallow: /admin + +=cut + +sub restrict { + my $self = shift; + + if(@_) { $self->{restrict} = \@_ } + unless($self->{restrict}) { $self->{restrict} = [] } + + return @{$self->{restrict}}; +} + +=item starting_url(); + +If called with one or more arguments, they are assumed to be URLs which will +seed the spider. The spider continues to run as long as there are URLs in +it's "TO-DO" list, this method simply adds items to that list. The arguments +to starting_url are just the filename part of the url, if you don't specify +one, it defaults to '/'. + +You can pass it either a list of URLs, or a list reference (so you can use +a list reference in the constructor.) + +=cut + +sub starting_url { + my $self = shift; + + if(@_) { + $self->{starting_url} = \@_; + $self->_populate_starting_urls; + } + unless($self->{starting_url}) { $self->{starting_url} = ['/'] } + return $self->{starting_url}; +} + +sub _populate_starting_urls { + my $self = shift; + my @populate = @_; + + unless(@populate) { @populate = $self->starting_url() } + + foreach(@populate) { + next unless $_; + if(ref($_)) { $self->_populate_starting_urls(@{$_}); next; } + $self->{storage}->{"http://".$self->domain.$_} ||= ''; + } +} + +=item delay(); + +Get or set the delay (in minutes) to wait between requests. The default is +1 minute, and if you want to hammer on your web server you can set this to +a value less than 1. + +=cut + +sub delay { + my $self = shift; + + if(@_) { $self->{delay} = shift } + + return $self->{delay} || 1; +} + +=item sitemap_file(); + +Sets the filename to save the L object to. This is +required. + +=cut + +sub sitemap_file { + my $self = shift; + + if(@_) { $self->{sitemap_file} = shift } + return $self->{sitemap_file}; +} + +=item sitemap_url(); + +Sets the url for the sitemap. This is optional, but if you specify it, then +the robot will notify Google (using L) after it +writes a new sitemap. + +=cut + +sub sitemap_url { + my $self = shift; + + if(@_) { $self->{sitemap_url} = shift } + return $self->{sitemap_url}; +} + +=item user_agent(); + +Set the User Agent that this robot uses to identify itself. The default is +'WWWGoogleSiteMapRobot/version' (unless you have subclassed this module, it's +actually the class name with special characters removed.) +Be careful about changing this while the robot is active (this includes +changing it between runs if you are storing the state) as this affects how +your robot interprets your robots.txt file. + +=cut + +sub user_agent { + my $self = shift; + + if(@_) { $self->{user_agent} = shift } + unless($self->{user_agent}) { + my $pkg = ref($self) || $self; + $pkg =~ s/\W//g; + $self->{user_agent} = join('/',$pkg,$VERSION); + } + return $self->{user_agent}; +} + +=item robot_rules(); + +Get or set the L object used to handle robots.txt. + +=cut + +sub robot_rules { + my $self = shift; + + if(@_) { $self->{robot_rules} = shift } + unless($self->{robot_rules}) { + $self->{robot_rules} = WWW::RobotRules->new($self->user_agent); + my $url = "http://".$self->domain."/robots.txt"; + my $mech = $self->mechanize(); + $mech->get($url); + $self->{robot_rules}->parse($url,$mech->content); + } + return $self->{robot_rules}; +} + +=item mechanize(); + +Get or set the L object used for retrieving web documents. + +=cut + +sub mechanize { + my $self = shift; + + if(@_) { $self->{mech} = shift } + unless($self->{mech}) { + $self->{mech} = WWW::Mechanize->new( + agent => $self->user_agent, + stack_depth => 1, + ); + } + return $self->{mech}; +} + +=item status_storage(); + +If you provide status_storage with a tied hash, it will be used to store the +state of the TO-DO list which includes the data needed to build the sitemap, +as well as the list of unvisited URLs. This means that the robot can continue +where it left off if it is interrupted for some reason before finishing, then +you don't have to re-spider the entire site. This is strongly recommended. + +You can use this with basically anything that can be implemented as a tied +hash, as long as it can handle fully-qualified URLs as keys, the values will +be simple scalars (it won't try to store references or anything like that +in the values.) + +Example: + + use WWW::Google::SiteMap::Robot; + use GDBM::File; + + tie my %storage, 'GDBM_File', '/tmp/my-robot-status', &GDBM_WRCREAT, 0640; + my $robot = WWW::Google::SiteMap::Robot->new( + restrict => qr{^http://www.jasonkohles.com/}, + starting_url => 'http://www.jasonkohles.com/index.html', + sitemap_file => '/var/www/html/sitemap.gz', + ); + +If you don't provide a tied hash to store the status in, it will be stored in +a normal (in-memory) hash. + +=cut + +sub status_storage { + my $self = shift; + + if(@_) { + $self->{storage} = shift; + # If the storage is changed, we might have lost our starting urls + $self->_populate_starting_urls; + } + unless($self->{storage}) { + $self->{storage} = {}; + $self->_populate_starting_urls; + } + return $self->{storage}; +} + +=item pending_urls(); + +Return a list of all the URLs that have been found, but have not yet been +visited. This may include URLs that will later be restricted, and will not +be visited. + +=cut + +sub pending_urls { + my $self = shift; + my $todo = $self->status_storage; + return grep { ! $todo->{$_} } keys %{$todo}; +} + +=item restricted_urls(); + +Return a list of all the URLs that are in the TO-DO list that have already +been tried, but were skipped because they were restricted. + +=cut + +sub restricted_urls { + my $self = shift; + $self->_url_data_match(qr/^RESTRICTED /o); +} + +=item visited_urls(); + +Return a list of all the URLs that have already been visited, and will be +included in the sitemap. + +=cut + +sub visited_urls { + my $self = shift; + $self->_url_data_match(qr/^OK /o); +} + +=item run(); + +Start the robot running. If you are building your robot into a larger +program that has to handle other tasks as well, then you can pass an integer +to run(), which will be the number of URLs to check (of course then you will +have to call it again later, probably in a loop, to make sure you get them +all.) Returns true if something was done, returns false if no pending URLs +were found in the TO-DO list. Calling start() again after it has returned +false is rather pointless. If you call it in a loop as part of a larger +program, you are also responsible for calling write_sitemap() after all the +data is collected. + +If called with no arguments (or a false argument) it will run until there are +no more URLs to process. + +=cut + +sub run { + my $self = shift; + my $count = shift; + + my $counter = $count; + my @waiting = $self->pending_urls; + my $mech = $self->mechanize; + while(1) { + sleep($self->delay * 60); # sleep first, because of all the nexts + unless(@waiting) { @waiting = $self->pending_urls } + if(my $url = shift(@waiting)) { + # first make sure we didn't already do it + next if $self->{storage}->{$url}; + # Then make sure it isn't restricted + if($self->_check_restrictions($url)) { + $self->{storage}->{$url} = 'RESTRICTED'; + next; + } + $mech->get($url); + if($mech->success) { + # extract the last modification time from the page + my $modtime = $mech->response->last_modified() + || (time - $mech->response->current_age); + $self->{storage}->{$url} = "SUCCESS $modtime"; + # add any links in the page to our todo list + foreach($mech->links) { + my $url = $_->url_abs; + $url =~ s/#[^#]+$//; + $self->{storage}->{$url} ||= ''; + } + } else { + $self->{storage}->{$url} = 'ERROR '.$mech->status(); + } + next; + } + + if($count) { + last unless $counter--; + } else { + last unless @waiting; + } + } + unless($count) { # if you are limiting, you have to do this part yourself + $self->write_sitemap() if $self->sitemap_file(); + } +} + +sub _check_restrictions { + my $self = shift; + my $url = shift; + + # some hard-coded restrictions for safety sake + if($url !~ /^(http|https):/) { + return 1; + } + + foreach my $r ($self->restrict) { + if(ref($r) eq 'Regexp' && $url =~ /$r/) { + return 1; + } + if(ref($r) eq 'CODE' && $r->($url)) { + return 1 + } + } + my $domain = $self->domain; + if($url !~ m{^\w+://$domain}o) { + return 1; + } + unless($self->robot_rules->allowed($url)) { + return 1; + } + return 0; +} + +=item write_sitemap(); + +Write out the sitemap (if a sitemap file was specified), and optionally notify +Google (if a sitemap url was specified). + +=cut + +sub write_sitemap { + my $self = shift; + + my $map = WWW::Google::SiteMap->new( + file => $self->sitemap_file, + pretty => 1, + ); + while(my($url,$val) = each(%{$self->{storage}})) { + next unless $val =~ /^SUCCESS /; + my(undef,$lastmod) = split(' ',$val); + $map->add(WWW::Google::SiteMap::URL->new( + loc => $url, + lastmod => $lastmod, + )); + } + $map->write; + + if($self->sitemap_url) { + my $ping = WWW::Google::SiteMap::Ping->new($self->sitemap_url); + $ping->submit; + } +} + +sub _url_data_match { + my $self = shift; + my $regexp = shift; + + my $todo = $self->status_storage; + return grep { + $todo->{$_} && $todo->{$_} =~ /^$regexp/o + } keys %{$todo}; +} + +=back + +=head1 EXAMPLE ROBOT + + #!/usr/bin/perl -w + ################## + use strict; + use warnings; + use lib 'lib'; + use WWW::Google::SiteMap::Robot; + use GDBM_File; + + foreach my $site (qw(www.example.com www.example2.com www.example3.com)) { + my $status = '/tmp/sitemap-robot-status.$site.db'; + tie my %storage, 'GDBM_File', $status, &GDBM_WRCREAT, 0640 + my $robot = WWW::Google::SiteMap::Robot->new( + domain => $site, + status_storage => \%storage, + sitemap_file => "/var/www/$site/sitemap.gz", + sitemap_url => "http://$site/sitemap.gz", + ); + $robot->run(); + } + +=head1 MODULE HOME PAGE + +The home page of this module is +L. This is where you +can always find the latest version, development versions, and bug reports. You +will also find a link there to report bugs. + +=head1 SEE ALSO + +L + +L + +L + +L + +L + +L + +=head1 AUTHOR + +Jason Kohles, Eemail@jasonkohles.comE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2005 by Jason Kohles + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.4 or, +at your option, any later version of Perl 5 you may have available. + +=cut + +1; +__END__ diff --git a/lib/WWW/Google/SiteMap/URL.pm b/lib/WWW/Google/SiteMap/URL.pm new file mode 100644 index 0000000..5551245 --- /dev/null +++ b/lib/WWW/Google/SiteMap/URL.pm @@ -0,0 +1,294 @@ +package WWW::Google::SiteMap::URL; +use vars qw($VERSION); $VERSION = '1.10'; + +=head1 NAME + +WWW::Google::SiteMap::URL - DEPRECATED - See Search::Sitemap + +=head1 DEPRECATED + +Now that more search engines than just Google are supporting the Sitemap +protocol, the WWW::Google::SiteMap module has been renamed to +L. + +=head1 SYNOPSIS + + use WWW::Google::SiteMap; + +=head1 DESCRIPTION + +This is a helper class that supports L and +L. + +=cut + +=head1 METHODS + +=over 4 + +=cut + +use strict; +use warnings; +use Carp qw(carp croak); +use XML::Twig qw(); +use POSIX qw(strftime); +use HTML::Entities qw(encode_entities); + +=item new() + +=cut + +sub new { + my $class = shift; + my %opts = ref($_[0]) ? %{$_[0]} : @_; + + my $self = bless({}, $class); + while(my($key,$value) = each %opts) { $self->$key($value) } + return $self; +} + +=item loc() + +Change the URL associated with this object. For a L +this specifies the URL to add to the sitemap, for a +L, this is the URL to the sitemap. + +=cut + +sub loc { + shift->_doval('loc', sub { + local $_ = shift; + return unless defined; + return 'must be less than 2048 characters long' unless length($_) < 2048; + return 'must be a fully qualified url' unless m{^https?://}; + return; + }, @_); +} + +=item changefreq() + +Set the change frequency of the object. This field is not used in sitemap +indexes, only in sitemaps. + +=cut + +sub changefreq { + shift->_doval('changefreq', sub { + local $_ = shift; + my @values = qw(always hourly daily weekly monthly yearly never); + my $re = join('|',@values); + return unless defined; + return 'must be one of '.join(', ',@values) unless /^$re$/; + return; + }, @_); +} + +=item lastmod() + +Set the last modified time. You have to provide this as one of the following: + +=over 4 + +=item a complete ISO8601 time string + +A complete time string will be accepted in exactly this format: + + YYYY-MM-DDTHH:MM:SS+TZ:TZ + YYYY - 4-digit year + MM - 2-digit month (zero padded) + DD - 2-digit year (zero padded) + T - literal character 'T' + HH - 2-digit hour (24-hour, zero padded) + SS - 2-digit second (zero padded) + +TZ:TZ - Timezone offset (hours and minutes from GMT, 2-digit, zero padded) + +=item epoch time + +Seconds since the epoch, such as would be returned from time(). If you provide +an epoch time, then an appropriate ISO8601 time will be constructed with +gmtime() (which means the timezone offset will be +00:00). If anyone knows +of a way to determine the timezone offset of the current host that is +cross-platform and doesn't add dozens of dependencies then I might change this. + +=item an ISO8601 date (YYYY-MM-DD) + +A simple date in YYYY-MM-DD format. The time will be set to 00:00:00+00:00. + +=item a L object. + +If a L object is provided, then an appropriate timestamp will be +constructed from it. + +=item a L object. + +If given an L object, the last modified time will be +calculated from whatever time information is available in the response +headers. Currently this means either the Last-Modified header, or tue +current time - the current_age() calculated by the response object. +This is useful for building web crawlers. + +=back + +Note that in order to conserve memory, any of these items that you provide +will be converted to a complete ISO8601 time string when they are stored. +This means that if you pass an object to lastmod(), you can't get it back +out. If anyone actually has a need to get the objects back out, then I +might make a configuration option to store the objects internally. + +If you have suggestions for other types of date/time objects or formats +that would be usefule, let me know and I'll consider them. + +=cut + +sub lastmod { + my $self = shift; + + return $self->{lastmod} unless @_; + + my $value = shift; + if(ref($value)) { + if($value->isa('DateTime')) { # DateTime object + my($date,$tzoff) = $value->strftime("%Y-%m-%dT%T","%z"); + if($tzoff =~ /^([+-])?(\d\d):?(\d\d)/) { + $tzoff = ($1 || '+').$2.':'.($3||'00'); + } else { + $tzoff = '+00:00'; + } + $self->{lastmod} = $date.$tzoff; + } elsif($value->isa('HTTP::Response')) { + my $modtime = $value->last_modified() + || (time - $value->current_age()); + $self->{lastmod} = strftime("%Y-%m-%dT%T+00:00",gmtime($_)); + } + } else { + local $_ = $value; + if(/^\d+$/) { # epoch time + $self->{lastmod} = strftime("%Y-%m-%dT%T+00:00",gmtime($_)); + } elsif(/^\d\d\d\d-\d\d-\d\d$/) { + $self->{lastmod} = $_.'T00:00:00+00:00'; + } elsif(/^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\+\d\d:\d\d$/) { + $self->{lastmod} = $_; + } + } + + return $self->{lastmod} if $self->{lastmod}; + $self->_err("'$_' is not a valid value for lastmod"); +} + +=item priority() + +Set the priority. This field is not used in sitemap indexes, only in sitemaps. + +=cut + +sub priority { + shift->_doval('priority', sub { + local $_ = shift; + return unless defined; + return 'must be a number' unless /^[\d\.]+$/; + return 'must be greater than 0.0' unless $_ >= 0.0; + return 'must be less than 1.0' unless $_ <= 1.0; + return; + }, @_); +} + +sub _doval { + my $self = shift; + my $var = shift; + my $valid = shift; + return $self->{$var} unless @_; + my $value = shift; + if(my $res = $valid->($value)) { + my $msg = "'$value' is not a valid value for $var: $res"; + if($self->{lenient}) { carp $msg } else { croak $msg } + } else { + $self->{$var} = $value; + } +} +sub _err { + my $self = shift; + + if($self->{lenient}) { carp @_ } else { croak @_ } +} + + +=item delete() + +Delete this object from the sitemap or the sitemap index. + +=cut + +sub delete { + my $self = shift; + for(keys %{$self}) { $self->{$_} = undef } +} + +=item lenient() + +If lenient contains a true value, then errors will not be fatal. + +=cut + +sub lenient { + my $self = shift; + $self->{lenient} = shift if @_; + return $self->{lenient}; +} + +sub as_elt { + my $self = shift; + my $type = shift || 'url'; + my @fields = @_; + unless(@fields) { @fields = qw(loc changefreq lastmod priority) } + my @elements = (); + foreach(@fields) { + my $val = $self->$_() || next; + if($_ eq 'loc') { + $val = XML::Twig::Elt->new('#PCDATA' => encode_entities($val)); + $val->set_asis(1); + } else { + $val = XML::Twig::Elt->new('#PCDATA' => $val); + } + push(@elements,$val->wrap_in($_)); + } + return XML::Twig::Elt->new($type, {}, @elements); +} + +=back + +=head1 MODULE HOME PAGE + +The home page of this module is +L. This is where you +can always find the latest version, development versions, and bug reports. You +will also find a link there to report bugs. + +=head1 SEE ALSO + +L + +L + +L + +L + +L + +=head1 AUTHOR + +Jason Kohles, Eemail@jasonkohles.comE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2005 by Jason Kohles + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.4 or, +at your option, any later version of Perl 5 you may have available. + +=cut + +1; +__END__