New script to gen_site

This commit is contained in:
nemunaire 2014-01-19 05:32:13 +01:00
parent 99a4edb33f
commit 1d7c89994d

250
gen_site.pl Normal file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env perl
use v5.10.1;
use strict;
use warnings;
use threads;
use threads::shared;
use Getopt::Long;
our $baseurl = "http://localhost/";
our $baseadmin = "admin/";
our $basehome = "";
our $baseteams = "connected/";
our $threads = 5;
sub test
{
my $p = FicPage->new("http://localhost/connected/169/6");
say $p->getSavePath();
$p->fetch();
$p->toRightURL();
$p->treatLinks();
print $p->getNearLinks();
$p->save();
}
sub genHome()
{
mirror($baseurl . $basehome);
}
sub genTeam($)
{
my $team_id = shift;
my $p = FicPage->new($baseurl . $baseteams . $team_id);
$p->fetch();
my @ths;
for my $link ($p->getNearLinks()) {
push @ths, threads->create(\&mirror, $link) if ($link =~ /\Q$baseteams\E/);
}
for my $th (@ths) {
$th->join();
}
}
sub genFull()
{
my $p = FicPage->new($baseurl . $baseteams);
$p->fetch();
my @ths;
for my $link ($p->getNearLinks()) {
push @ths, threads->create(\&mirror, $link) if ($link =~ /\Q$baseteams\E/);
}
for my $th (@ths) {
$th->join();
}
}
sub parse($)
{
say shift;
}
sub mirror
{
my $p = FicPage->new($baseurl . $_[0]);
return if ($p->alreadySaved());
$p->fetch();
my @links = $p->getNearLinks();
$p->treatLinks();
$p->save();
for my $link (@links)
{
mirror($link) if ($_[0] ne $link);
}
}
# Parse arguments
my $help; my $deamon;
GetOptions ("threads|thread|t=i" => \$threads,
"baseadmin|ba=s" => \$baseadmin,
"basehome|bh=s" => \$basehome,
"baseteams|bt=s" => \$baseteams,
"deamon|d|?" => \$deamon,
"help|h|?" => \$help);
if ($deamon)
{
while(<>) {
parse($_);
}
}
elsif (@ARGV)
{
while (shift) {
parse($_);
}
}
else
{
my $th = threads->create(\&genHome);
genFull();
$th->join();
}
package FicPage;
use v5.10.1;
use strict;
use warnings;
use File::Basename;
use File::Path qw(make_path);
use HTTP::Request::Common qw(GET POST);
use LWP::UserAgent;
sub new
{
my $class = shift;
my $self = {
url => shift,
};
bless $self, $class;
return $self;
}
sub toRightURL($)
{
my $self = shift;
my $search = $self->{url};
$search =~ s!/$!!;
for my $url ($self->getNearLinks())
{
$url = $baseurl . $url;
if ($url =~ /^\Q$search\E/)
{
$url =~ s!^(\Q$search\E[^/]*).*$!$1/!;
$url .= "/" if ($url !~ /\.[a-z0-9]{2,4}$/);
$self->{url} = $url;
return $url;
}
}
return $self->{url};
}
sub getLinks($)
{
my $self = shift;
return $self->{content} =~ /(?:src|href)="([^"]+)"/g;
}
sub getNearLinks($)
{
my $self = shift;
return $self->{content} =~ /(?:src|href)="(?:\Q$baseurl\E|(?:\/?\.\.)+|\/)([^"]+)"/g;
}
sub treatLinks($)
{
my $self = shift;
$self->{content} =~ s!(src|href)="/( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
}
sub fetch($)
{
my $self = shift;
my $ua = LWP::UserAgent->new;
my $res = $ua->request(GET $self->{url});
$self->{content} = $res->content;
}
sub alreadySaved($;$)
{
my $self = shift;
my $path = $self->getSavePath(@_);
return -f $path || ( -d $path && -f "$path/index.html" );
}
sub getSavePath($;$)
{
my $self = shift;
my $basedir = shift // "outest";
# Convert URL to real directory path
my $path = $self->{url};
$path =~ s/^\Q$main::baseurl\E//;
return "$basedir/$path";
}
sub save($;$)
{
my $self = shift;
# Convert URL to real directory path
my $path = $self->getSavePath(@_);
eval
{
if ($path =~ /\.[a-z0-9]{2,4}$/)
{
eval {
make_path( dirname("$path") ) if (! -d dirname("$path") );
};
open my $fd, ">", $path or die "$path: $!";
print $fd $self->{content};
close $fd;
}
else
{
eval {
make_path "$path" if (! -d "$path");
};
open my $fd, ">", "$path/index.html" or die "$path: $!";
print $fd $self->{content};
close $fd;
}
};
print $@ if ($@);
}