server/gen_site.pl

543 lines
8.3 KiB
Perl
Raw Normal View History

2014-01-19 04:32:13 +00:00
#!/usr/bin/env perl
use v5.10.1;
use strict;
use warnings;
use threads;
use threads::shared;
2014-01-19 13:51:18 +00:00
2014-01-19 04:32:13 +00:00
use Getopt::Long;
2014-01-19 13:51:18 +00:00
use Thread::Queue;
our $outdir = "outest";
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
our $baseurl = "http://localhost";
our $baseadmin = "/admin/";
our $basehome = "/";
our $baseteams = "/connected/";
our $threads = 6;
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
sub genHome(;$)
2014-01-19 04:32:13 +00:00
{
2014-01-19 13:51:18 +00:00
my $m = shift // Mirror->new();
$m->add_url($basehome);
return $m;
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
sub genFull(;$)
2014-01-19 04:32:13 +00:00
{
2014-01-19 13:51:18 +00:00
my $m = shift // Mirror->new();
$m->add_url($baseteams);
return $m;
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
sub genTeam($;$)
2014-01-19 04:32:13 +00:00
{
my $team_id = shift;
2014-01-19 13:51:18 +00:00
my $m = shift // Mirror->new();
$m->add_url($baseteams . $team_id);
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
return $m;
}
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
sub genTeamTheme($$;$)
{
my $team_id = shift;
my $theme_id = shift;
my $m = shift // Mirror->new();
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
$m->add_url($baseteams . $team_id . "/" . $theme_id);
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
return $m;
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
my $queue :shared = Thread::Queue->new();
my $main_thread;
sub manage
2014-01-19 04:32:13 +00:00
{
2014-01-19 13:51:18 +00:00
my $m = shift // Mirror->new();
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
while (1)
{
if ($queue->pending() <= 0)
{
lock($queue);
cond_wait($queue) while $queue->pending() <= 0;
}
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
my $cmd;
{
lock($queue);
$cmd = $queue->peek();
}
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
$m->{end} = 0 if ($m->{end});
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
for ($cmd)
{
if (/^all$/)
{
say "Generate all teams";
genFull($m);
}
elsif (/^HOME$/)
{
say "Generate full public part";
genHome($m);
}
elsif (/^TEAM([0-9]+)$/)
{
say "Generate team: $1";
genTeam($1, $m);
}
elsif (/^TEAM([0-9]+),([0-9]+)$/)
{
say "Generate team theme: $1/$2";
genTeamTheme($1, $2, $m);
}
elsif (/^(reset)*r(e(s(e(t)?)?)?)?/)
{
say "Performing RESET ...";
$m->reset();
}
elsif (/^J$/)
{
say "JOIN receive, stopping all threads...";
$m->stop();
return 1;
}
elsif (/^help/i)
{
say "TODO, sorry :(";
}
else {
say "$cmd is not a valid command";
}
}
next if ! $m->join();
say ">>> $cmd done";
{
lock($queue);
my $dq = $queue->dequeue();
if ($cmd ne $dq)
{
$queue->insert(0, $dq);
for (my $i = 0; $i < $queue->pending(); $i++)
{
if ($queue->peek($i) eq $cmd)
{
$queue->extract($i);
last;
}
}
}
}
}
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
sub parse($$)
2014-01-19 04:32:13 +00:00
{
2014-01-19 13:51:18 +00:00
my $m = shift;
my $change_current = 0;
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
for my $cmd ($_[0] =~ /([^:]+)/g)
{
my $len = length($cmd);
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
# Search the right position
my $i;
for ($i = 0; $i < $queue->pending(); $i++)
{
last if ($len > length($queue->peek($i)));
}
#say "Inserting $cmd at position $i/".$queue->pending();
$queue->insert($i, $cmd);
$change_current = 1 if $i == 0 && $queue->pending() != 1;
}
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
if ($change_current)
{
say "Priority item have changed, stoping running threads";
$m->end();
}
2014-01-19 04:32:13 +00:00
{
2014-01-19 13:51:18 +00:00
lock($queue);
cond_broadcast($queue);
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
#print Dumper($queue);
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
2014-01-19 04:32:13 +00:00
# Parse arguments
my $help; my $deamon;
GetOptions ("threads|thread|t=i" => \$threads,
"baseadmin|ba=s" => \$baseadmin,
"basehome|bh=s" => \$basehome,
"baseteams|bt=s" => \$baseteams,
2014-01-19 13:51:18 +00:00
"outdir|out|o=s" => \$outdir,
"deamon|d" => \$deamon,
2014-01-19 04:32:13 +00:00
"help|h|?" => \$help);
if ($deamon)
{
2014-01-19 13:51:18 +00:00
my $m :shared = Mirror->new();
$main_thread = threads->create(\&manage, $m);
2014-01-19 04:32:13 +00:00
while(<>) {
2014-01-19 13:51:18 +00:00
chomp $_;
parse($m, $_);
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
parse($m, "J");
$main_thread->join();
2014-01-19 04:32:13 +00:00
}
elsif (@ARGV)
{
2014-01-19 13:51:18 +00:00
my $m :shared = Mirror->new();
$main_thread = threads->create(\&manage, $m);
while ($_ = shift) {
parse($m, $_);
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
parse($m, "J");
$main_thread->join();
2014-01-19 04:32:13 +00:00
}
else
{
2014-01-19 13:51:18 +00:00
my $m = genFull();
genHome($m);
$m->join();
}
package Mirror;
use v5.10.1;
use strict;
use warnings;
use threads;
use threads::shared;
use Thread::Queue;
sub new($)
{
my $class = shift;
my $self :shared = shared_clone({
threads => [],
stop => 0,
});
bless $self, $class;
$self->start();
return $self;
}
sub start($)
{
my $self = shift;
$self->{add} = Thread::Queue->new("#RESET");
$self->{todo} = Thread::Queue->new();
$self->{waiting} = 0;
$self->{end} = 0;
push @{ $self->{threads} }, threads->create(\&run_add, $self)->tid();
for (my $i = 0; $i < $main::threads; $i++) {
push @{ $self->{threads} }, threads->create(\&run, $self, $i + 1)->tid();
}
}
sub reset($)
{
my $self = shift;
$self->{add}->enqueue("#RESET");
}
sub end($)
{
my $self = shift;
$self->{end} = 1;
while($self->{add}->dequeue_nb()) { }
while($self->{todo}->dequeue_nb()) { }
$self->reset();
{
lock($self);
2014-01-19 04:32:13 +00:00
2014-01-19 13:51:18 +00:00
cond_broadcast($self);
}
return $self->join();
2014-01-19 04:32:13 +00:00
}
2014-01-19 13:51:18 +00:00
sub stop($)
{
my $self = shift;
$self->{stop} = 1;
return $self->end();
}
sub add_url($@)
{
my $self = shift;
while (my $link = shift)
{
$self->{add}->enqueue($link);
}
}
sub run_add
{
my $self = shift;
my $id = shift // 0;
my %urlseen;
while (! $self->{stop})
{
my $link = $self->{add}->dequeue_nb();
if (! defined $link)
{
{
lock($self);
$self->{waiting} += 1;
cond_broadcast($self);
}
$link = $self->{add}->dequeue();
last if (! defined $link);
{
lock($self);
$self->{waiting} -= 1;
}
}
next if (exists $urlseen{$link});
if ($link eq "#RESET")
{
%urlseen = ();
}
else
{
$self->{todo}->enqueue($link);
$urlseen{$link} = 1;
}
}
}
sub run
{
my $self = shift;
my $id = shift // 0;
while (! $self->{stop})
{
my $url = $self->{todo}->dequeue_nb();
if (! defined $url)
{
{
lock($self);
$self->{waiting} += 1;
cond_broadcast($self);
}
$url = $self->{todo}->dequeue();
last if (! defined $url);
{
lock($self);
$self->{waiting} -= 1;
}
}
say "[$id] $baseurl$url";
my $p = FicPage->new($baseurl . $url);
$p->fetch();
$p->toRightURL() if ($url !~ /\/$/);
my @links = $p->getNearLinks() if ! $self->{end};
$p->treatLinks();
$p->save();
for my $link (@links)
{
$self->add_url($link) if ($link ne $url && $link =~ /^?\Q$url\E/);
}
}
}
sub join($)
{
my $self = shift;
sleep 1 if ($self->{waiting} == $main::threads + 1);
return ($self->{waiting} > 0 && ! $self->{end}) if ($self->{waiting} == $main::threads + 1);
{
lock($self);
cond_wait($self) until ($self->{waiting} >= $main::threads + 1 || $self->{waiting} < 0 || $self->{stop});
}
return ($self->{waiting} > 0 && ! $self->{end});
}
2014-01-19 04:32:13 +00:00
package FicPage;
use v5.10.1;
use strict;
use warnings;
use File::Basename;
use File::Path qw(make_path);
use HTTP::Request::Common qw(GET POST);
use LWP::UserAgent;
sub new
{
my $class = shift;
my $self = {
url => shift,
};
bless $self, $class;
return $self;
}
sub toRightURL($)
{
my $self = shift;
my $search = $self->{url};
$search =~ s!/$!!;
2014-01-19 13:51:18 +00:00
for my $url ($self->getLinks())
2014-01-19 04:32:13 +00:00
{
$url = $baseurl . $url;
if ($url =~ /^\Q$search\E/)
{
$url =~ s!^(\Q$search\E[^/]*).*$!$1/!;
$self->{url} = $url;
return $url;
}
}
return $self->{url};
}
sub getLinks($)
{
my $self = shift;
return $self->{content} =~ /(?:src|href)="([^"]+)"/g;
}
sub getNearLinks($)
{
my $self = shift;
2014-01-19 13:51:18 +00:00
return $self->{content} =~ /(?:src|href)="(?:\Q$main::baseurl\E|(?:\/?\.\.)+)?([^:"]+)"/g;
2014-01-19 04:32:13 +00:00
}
sub treatLinks($)
{
my $self = shift;
2014-01-19 13:51:18 +00:00
$self->{content} =~ s!(src|href)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
2014-01-19 04:32:13 +00:00
}
sub fetch($)
{
my $self = shift;
my $ua = LWP::UserAgent->new;
my $res = $ua->request(GET $self->{url});
$self->{content} = $res->content;
}
sub alreadySaved($;$)
{
my $self = shift;
my $path = $self->getSavePath(@_);
return -f $path || ( -d $path && -f "$path/index.html" );
}
sub getSavePath($;$)
{
my $self = shift;
2014-01-19 13:51:18 +00:00
my $basedir = shift // $main::outdir;
2014-01-19 04:32:13 +00:00
# Convert URL to real directory path
my $path = $self->{url};
$path =~ s/^\Q$main::baseurl\E//;
2014-01-19 13:51:18 +00:00
return "$basedir$path";
2014-01-19 04:32:13 +00:00
}
sub save($;$)
{
my $self = shift;
# Convert URL to real directory path
my $path = $self->getSavePath(@_);
eval
{
if ($path =~ /\.[a-z0-9]{2,4}$/)
{
eval {
make_path( dirname("$path") ) if (! -d dirname("$path") );
};
open my $fd, ">", $path or die "$path: $!";
print $fd $self->{content};
close $fd;
}
else
{
eval {
make_path "$path" if (! -d "$path");
};
open my $fd, ">", "$path/index.html" or die "$path: $!";
print $fd $self->{content};
close $fd;
}
};
print $@ if ($@);
}