libwww-google-sitemap-perl/lib/WWW/Google/SiteMap/URL.pm

295 lines
7.1 KiB
Perl

package WWW::Google::SiteMap::URL;
use vars qw($VERSION); $VERSION = '1.10';
=head1 NAME
WWW::Google::SiteMap::URL - DEPRECATED - See Search::Sitemap
=head1 DEPRECATED
Now that more search engines than just Google are supporting the Sitemap
protocol, the WWW::Google::SiteMap module has been renamed to
L<Search::Sitemap>.
=head1 SYNOPSIS
use WWW::Google::SiteMap;
=head1 DESCRIPTION
This is a helper class that supports L<WWW::Google::SiteMap> and
L<WWW::Google::SiteMap::Index>.
=cut
=head1 METHODS
=over 4
=cut
use strict;
use warnings;
use Carp qw(carp croak);
use XML::Twig qw();
use POSIX qw(strftime);
use HTML::Entities qw(encode_entities);
=item new()
=cut
sub new {
my $class = shift;
my %opts = ref($_[0]) ? %{$_[0]} : @_;
my $self = bless({}, $class);
while(my($key,$value) = each %opts) { $self->$key($value) }
return $self;
}
=item loc()
Change the URL associated with this object. For a L<WWW::Google::SiteMap>
this specifies the URL to add to the sitemap, for a
L<WWW::Google::SiteMap::Index>, this is the URL to the sitemap.
=cut
sub loc {
shift->_doval('loc', sub {
local $_ = shift;
return unless defined;
return 'must be less than 2048 characters long' unless length($_) < 2048;
return 'must be a fully qualified url' unless m{^https?://};
return;
}, @_);
}
=item changefreq()
Set the change frequency of the object. This field is not used in sitemap
indexes, only in sitemaps.
=cut
sub changefreq {
shift->_doval('changefreq', sub {
local $_ = shift;
my @values = qw(always hourly daily weekly monthly yearly never);
my $re = join('|',@values);
return unless defined;
return 'must be one of '.join(', ',@values) unless /^$re$/;
return;
}, @_);
}
=item lastmod()
Set the last modified time. You have to provide this as one of the following:
=over 4
=item a complete ISO8601 time string
A complete time string will be accepted in exactly this format:
YYYY-MM-DDTHH:MM:SS+TZ:TZ
YYYY - 4-digit year
MM - 2-digit month (zero padded)
DD - 2-digit year (zero padded)
T - literal character 'T'
HH - 2-digit hour (24-hour, zero padded)
SS - 2-digit second (zero padded)
+TZ:TZ - Timezone offset (hours and minutes from GMT, 2-digit, zero padded)
=item epoch time
Seconds since the epoch, such as would be returned from time(). If you provide
an epoch time, then an appropriate ISO8601 time will be constructed with
gmtime() (which means the timezone offset will be +00:00). If anyone knows
of a way to determine the timezone offset of the current host that is
cross-platform and doesn't add dozens of dependencies then I might change this.
=item an ISO8601 date (YYYY-MM-DD)
A simple date in YYYY-MM-DD format. The time will be set to 00:00:00+00:00.
=item a L<DateTime> object.
If a L<DateTime> object is provided, then an appropriate timestamp will be
constructed from it.
=item a L<HTTP::Response> object.
If given an L<HTTP::Response> object, the last modified time will be
calculated from whatever time information is available in the response
headers. Currently this means either the Last-Modified header, or tue
current time - the current_age() calculated by the response object.
This is useful for building web crawlers.
=back
Note that in order to conserve memory, any of these items that you provide
will be converted to a complete ISO8601 time string when they are stored.
This means that if you pass an object to lastmod(), you can't get it back
out. If anyone actually has a need to get the objects back out, then I
might make a configuration option to store the objects internally.
If you have suggestions for other types of date/time objects or formats
that would be usefule, let me know and I'll consider them.
=cut
sub lastmod {
my $self = shift;
return $self->{lastmod} unless @_;
my $value = shift;
if(ref($value)) {
if($value->isa('DateTime')) { # DateTime object
my($date,$tzoff) = $value->strftime("%Y-%m-%dT%T","%z");
if($tzoff =~ /^([+-])?(\d\d):?(\d\d)/) {
$tzoff = ($1 || '+').$2.':'.($3||'00');
} else {
$tzoff = '+00:00';
}
$self->{lastmod} = $date.$tzoff;
} elsif($value->isa('HTTP::Response')) {
my $modtime = $value->last_modified()
|| (time - $value->current_age());
$self->{lastmod} = strftime("%Y-%m-%dT%T+00:00",gmtime($_));
}
} else {
local $_ = $value;
if(/^\d+$/) { # epoch time
$self->{lastmod} = strftime("%Y-%m-%dT%T+00:00",gmtime($_));
} elsif(/^\d\d\d\d-\d\d-\d\d$/) {
$self->{lastmod} = $_.'T00:00:00+00:00';
} elsif(/^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\+\d\d:\d\d$/) {
$self->{lastmod} = $_;
}
}
return $self->{lastmod} if $self->{lastmod};
$self->_err("'$_' is not a valid value for lastmod");
}
=item priority()
Set the priority. This field is not used in sitemap indexes, only in sitemaps.
=cut
sub priority {
shift->_doval('priority', sub {
local $_ = shift;
return unless defined;
return 'must be a number' unless /^[\d\.]+$/;
return 'must be greater than 0.0' unless $_ >= 0.0;
return 'must be less than 1.0' unless $_ <= 1.0;
return;
}, @_);
}
sub _doval {
my $self = shift;
my $var = shift;
my $valid = shift;
return $self->{$var} unless @_;
my $value = shift;
if(my $res = $valid->($value)) {
my $msg = "'$value' is not a valid value for $var: $res";
if($self->{lenient}) { carp $msg } else { croak $msg }
} else {
$self->{$var} = $value;
}
}
sub _err {
my $self = shift;
if($self->{lenient}) { carp @_ } else { croak @_ }
}
=item delete()
Delete this object from the sitemap or the sitemap index.
=cut
sub delete {
my $self = shift;
for(keys %{$self}) { $self->{$_} = undef }
}
=item lenient()
If lenient contains a true value, then errors will not be fatal.
=cut
sub lenient {
my $self = shift;
$self->{lenient} = shift if @_;
return $self->{lenient};
}
sub as_elt {
my $self = shift;
my $type = shift || 'url';
my @fields = @_;
unless(@fields) { @fields = qw(loc changefreq lastmod priority) }
my @elements = ();
foreach(@fields) {
my $val = $self->$_() || next;
if($_ eq 'loc') {
$val = XML::Twig::Elt->new('#PCDATA' => encode_entities($val));
$val->set_asis(1);
} else {
$val = XML::Twig::Elt->new('#PCDATA' => $val);
}
push(@elements,$val->wrap_in($_));
}
return XML::Twig::Elt->new($type, {}, @elements);
}
=back
=head1 MODULE HOME PAGE
The home page of this module is
L<http://www.jasonkohles.com/software/WWW-Google-SiteMap>. This is where you
can always find the latest version, development versions, and bug reports. You
will also find a link there to report bugs.
=head1 SEE ALSO
L<WWW::Google::SiteMap>
L<WWW::Google::SiteMap::Index>
L<WWW::Google::SiteMap::Ping>
L<http://www.jasonkohles.com/software/WWW-Google-SiteMap/>
L<https://www.google.com/webmasters/sitemaps/docs/en/protocol.html>
=head1 AUTHOR
Jason Kohles, E<lt>email@jasonkohles.comE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2005 by Jason Kohles
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.4 or,
at your option, any later version of Perl 5 you may have available.
=cut
1;
__END__