#!/usr/local/bin/perl -w use strict; use WiKicker::File; use WiKicker::URI; use Storable qw(thaw); my $db_home_dir = '/home/www/var/WiKicker'; my $site = 'http://www.somedomain.com/wiki.cgi'; my $sitemap_file = '/home/www/public_html/sitemap_wiki.xml'; my $GZIP = '/usr/bin/gzip'; my $db_file_dir = $db_home_dir . '/page'; my $db_cache_dir = $db_home_dir . "/info/basic"; my $db_index_file = $db_file_dir . '/index'; my $db_touch_file = $db_home_dir . '/last-modified.txt'; open(FILE, "< $db_touch_file") || die "can't open $db_touch_file\n"; my $last_mtime = ; close(FILE); my $current_time = time; my $last_mtime_string = get_gtime_string($last_mtime); my @db_idx; {; my @tmp_idx; open(INDEX, "< ${db_index_file}") || die "Can't open ${db_index_file}.\n"; @tmp_idx = ; close(INDEX) || die "Can't close ${db_index_file}\n"; chomp(@tmp_idx); while (my $page_name = shift @tmp_idx) { push(@db_idx, $page_name) if ($page_name ne 'FrontPage'); } @db_idx = ('FrontPage', sort @db_idx); } my $total = @db_idx; my $sitemap =<<_EOS_; _EOS_ for (my $i = 0; $i < @db_idx; $i ++) { my $page_name = $db_idx[$i]; my $db_path; my %url = qw(priority 0.5); my $mtime; my $key = WiKicker::File::key_to_file_name_base64($page_name); $db_path = sprintf("%s/%s", $db_file_dir, $key); die "Can't access to ${db_path} .\n" if (! -e $db_path); if ($page_name eq 'FrontPage') { $url{loc} = $site; $url{priority} += 0.2; } elsif ($page_name =~ /^IndexPage/) { ; } else { $url{loc} = sprintf("%s/%s.html", $site, WiKicker::URI::page_name_to_path_segments($page_name)); } $mtime = get_last_modified_time_by_cache_file($key); $url{lastmod} = get_time_string($mtime); if ( ($current_time - $mtime) < (60 * 60 * 24 * 7) ) { $url{changefreq} = 'dairy'; $url{priority} += 0.1; } else { $url{changefreq} = (($current_time - $mtime) < (60 * 60 * 24 * 30)) ? 'weekly' : 'monthly'; } if ($i + 1 < @db_idx) { my $next_page_name = $db_idx[$i + 1]; $url{priority} += 0.1 if ( $next_page_name =~ m#^${page_name}/# ); } $sitemap .= get_url_tag(%url); } {; my %url; $url{lastmod} = get_time_string(int($last_mtime)); $url{changefreq} = 'allways'; $url{priority} = 0.4; for (my $i = 0; $i < $total; $i += 50) { $url{loc} = "$site/IndexPage.html"; $url{loc} .= sprintf("?offset=%d", $i) if ($i != 0); $sitemap .= get_url_tag(%url); } # $url{loc} = "$site/RecentChanges.html"; # $url{priority} = 0.3; # $sitemap .= get_url_tag(%url); } $sitemap .= "\n"; open(FILE, "|$GZIP >${sitemap_file}.gz") || die "can't open ${sitemap_file}.\n"; print FILE $sitemap; close(FILE) || die "can't close ${sitemap_file}\n"; print "Content-Type: text/html\n\n\n"; exit(0); sub get_time_string { my $time = shift; # ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time); my ($sec, $min, $hour, $mday, $mon, $year) = gmtime($time); $year += 1900; $mon ++; return sprintf("%0.4d-%0.2d-%0.2dT%0.2d:%0.2d:%0.2d+00:00",$year,$mon,$mday,$hour,$min,$sec); } sub get_gtime_string { my $time = shift; my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime($time); return sprintf("%s, %02d %s %d %02d:%02d:%02d GMT", qw(Sun Mon Tue Wed Thu Fri Sat)[$wday], $mday, qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)[$mon], $year + 1900, $hour, $min, $sec ); } sub get_last_modified_time_by_cache_file { my $key = shift; my $value; if (open(FILE, "$db_cache_dir/$key")) { binmode(FILE); local $/; $value = ; close(FILE); } else { die "can't read file $key.\n"; } my $record = \%{thaw($value)}; return $record->{lastmodified}; } sub get_url_tag { my %p = @_; my $t; $t = " \n"; $t .= sprintf(" %s\n", $p{loc}) if (defined($p{loc})); $t .= sprintf(" %s\n", $p{lastmod}) if (defined($p{lastmod})); $t .= sprintf(" %s\n", $p{changefreq}) if (defined($p{changefreq})); $t .= sprintf(" %0.1f\n", $p{priority}) if (defined($p{priority}) && $p{priority} != 0.5); $t .= " \n"; return $t; } # Local Variables: # mode: cperl # coding: utf-8 # End: