2014-01-19 04:32:13 +00:00
|
|
|
#!/usr/bin/env perl
|
2014-11-20 14:39:54 +00:00
|
|
|
#=============================================================================
|
|
|
|
#
|
|
|
|
# USAGE: ./gen_site.pl [options] [commands]
|
|
|
|
#
|
|
|
|
# DESCRIPTION: More efficient wget -m
|
|
|
|
#
|
|
|
|
# AUTHOR: Pierre-Olivier Mercier <nemunaire@epita.fr>
|
|
|
|
# ORGANIZATION: EPITA SRS
|
|
|
|
#
|
|
|
|
#=============================================================================
|
2014-01-19 04:32:13 +00:00
|
|
|
|
|
|
|
use v5.10.1;
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use threads;
|
|
|
|
use threads::shared;
|
2014-01-19 13:51:18 +00:00
|
|
|
|
2014-01-19 16:49:07 +00:00
|
|
|
use Cwd 'abs_path';
|
|
|
|
use File::Basename;
|
|
|
|
use File::Copy;
|
|
|
|
use File::Find;
|
|
|
|
use File::Path qw/make_path remove_tree/;
|
|
|
|
use File::Temp "tempdir";
|
2014-01-19 04:32:13 +00:00
|
|
|
use Getopt::Long;
|
2014-01-20 04:53:31 +00:00
|
|
|
use IO::Socket;
|
2014-01-19 13:51:18 +00:00
|
|
|
use Thread::Queue;
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
### GLOBALS ###########################################################
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
our $outdir = "outest";
|
2014-01-19 16:49:07 +00:00
|
|
|
our $outteams = "/teams/";
|
2014-01-20 08:58:01 +00:00
|
|
|
our $outerrors = "/errors/";
|
2014-01-19 16:49:07 +00:00
|
|
|
our $outhome = "/htdocs/";
|
|
|
|
our $tmpdir = tempdir();
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
our $baseurl = "http://localhost";
|
|
|
|
our $baseadmin = "/admin/";
|
|
|
|
our $basehome = "/";
|
2014-01-20 08:58:01 +00:00
|
|
|
our $baseerrors = "/errors/";
|
2014-01-19 13:51:18 +00:00
|
|
|
our $baseteams = "/connected/";
|
|
|
|
our $threads = 6;
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
my $deamon;
|
|
|
|
my $socket;
|
|
|
|
|
|
|
|
my $queue :shared = Thread::Queue->new();
|
|
|
|
my $main_thread;
|
|
|
|
|
|
|
|
|
|
|
|
### GENERATORS ########################################################
|
|
|
|
|
|
|
|
# Enqueue error pages in the mirror
|
2014-01-20 08:58:01 +00:00
|
|
|
sub genErrors(;$)
|
|
|
|
{
|
|
|
|
my $m = shift // Mirror->new();
|
|
|
|
|
|
|
|
$m->add_url($baseerrors . "400");
|
|
|
|
$m->add_url($baseerrors . "403");
|
|
|
|
$m->add_url($baseerrors . "404");
|
|
|
|
$m->add_url($baseerrors . "413");
|
|
|
|
$m->add_url($baseerrors . "500");
|
|
|
|
$m->add_url($baseerrors . "502");
|
|
|
|
|
|
|
|
return $m;
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue public pages in the mirror
|
2014-01-19 13:51:18 +00:00
|
|
|
sub genHome(;$)
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = shift // Mirror->new();
|
2014-01-19 16:49:07 +00:00
|
|
|
$m->add_url($basehome . "index.html");
|
2014-01-19 13:51:18 +00:00
|
|
|
$m->add_url($basehome);
|
|
|
|
|
2014-01-20 08:58:01 +00:00
|
|
|
genErrors($m);
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
return $m;
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue all teams in the mirror
|
2014-01-19 13:51:18 +00:00
|
|
|
sub genFull(;$)
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = shift // Mirror->new();
|
|
|
|
$m->add_url($baseteams);
|
|
|
|
|
|
|
|
return $m;
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue a team in the mirror
|
2014-01-19 13:51:18 +00:00
|
|
|
sub genTeam($;$)
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
|
|
|
my $team_id = shift;
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = shift // Mirror->new();
|
|
|
|
|
|
|
|
$m->add_url($baseteams . $team_id);
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
return $m;
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue theme pages for a given team in the mirror
|
2014-01-19 13:51:18 +00:00
|
|
|
sub genTeamTheme($$;$)
|
|
|
|
{
|
|
|
|
my $team_id = shift;
|
|
|
|
my $theme_id = shift;
|
|
|
|
my $m = shift // Mirror->new();
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
$m->add_url($baseteams . $team_id . "/" . $theme_id);
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
return $m;
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
### TOOLS #############################################################
|
2014-01-19 13:51:18 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Manage the mirror
|
2014-01-19 13:51:18 +00:00
|
|
|
sub manage
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = shift // Mirror->new();
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
if ($queue->pending() <= 0)
|
|
|
|
{
|
|
|
|
lock($queue);
|
|
|
|
cond_wait($queue) while $queue->pending() <= 0;
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
my $cmd;
|
|
|
|
{
|
|
|
|
lock($queue);
|
|
|
|
$cmd = $queue->peek();
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
$m->{end} = 0 if ($m->{end});
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
for ($cmd)
|
|
|
|
{
|
|
|
|
if (/^all$/)
|
|
|
|
{
|
|
|
|
say "Generate all teams";
|
|
|
|
genFull($m);
|
|
|
|
}
|
2014-01-20 08:58:01 +00:00
|
|
|
elsif (/^HOME/)
|
2014-01-19 13:51:18 +00:00
|
|
|
{
|
|
|
|
say "Generate full public part";
|
|
|
|
genHome($m);
|
|
|
|
}
|
2014-01-20 08:58:01 +00:00
|
|
|
elsif (/^ERRORS?/)
|
|
|
|
{
|
|
|
|
say "Generate errors pages";
|
|
|
|
genErrors($m);
|
|
|
|
}
|
2014-01-19 13:51:18 +00:00
|
|
|
elsif (/^TEAM([0-9]+)$/)
|
|
|
|
{
|
|
|
|
say "Generate team: $1";
|
|
|
|
genTeam($1, $m);
|
|
|
|
}
|
|
|
|
elsif (/^TEAM([0-9]+),([0-9]+)$/)
|
|
|
|
{
|
|
|
|
say "Generate team theme: $1/$2";
|
|
|
|
genTeamTheme($1, $2, $m);
|
|
|
|
}
|
|
|
|
elsif (/^(reset)*r(e(s(e(t)?)?)?)?/)
|
|
|
|
{
|
|
|
|
say "Performing RESET ...";
|
2014-01-19 16:49:07 +00:00
|
|
|
remove_tree($main::tmpdir);
|
|
|
|
mkdir($main::tmpdir);
|
2014-01-19 13:51:18 +00:00
|
|
|
$m->reset();
|
|
|
|
}
|
2014-01-19 16:49:07 +00:00
|
|
|
elsif (/^(D)?(SYNC)*S(Y(N(C)?)?)?/)
|
|
|
|
{
|
|
|
|
sync($1);
|
|
|
|
}
|
|
|
|
elsif (/^LS$/)
|
|
|
|
{
|
|
|
|
system("ls '$main::tmpdir'");
|
|
|
|
}
|
|
|
|
elsif (/^J(O(I(N)?)?)?$/)
|
2014-01-19 13:51:18 +00:00
|
|
|
{
|
|
|
|
say "JOIN receive, stopping all threads...";
|
|
|
|
$m->stop();
|
|
|
|
return 1;
|
|
|
|
}
|
2014-11-23 15:02:45 +00:00
|
|
|
elsif (/^RT(E(A(M(S)?)?)?)?/)
|
|
|
|
{
|
|
|
|
if (-x "nginx_gen_team.sh") {
|
|
|
|
qx(./nginx_gen_team.sh > ./misc/shared/nginx-teams.conf)
|
|
|
|
} else {
|
|
|
|
say "Unable to find nginx_gen_team.sh"
|
|
|
|
}
|
|
|
|
}
|
2014-01-19 13:51:18 +00:00
|
|
|
elsif (/^help/i)
|
|
|
|
{
|
|
|
|
say "TODO, sorry :(";
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
say "$cmd is not a valid command";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
next if ! $m->join();
|
|
|
|
say ">>> $cmd done";
|
|
|
|
{
|
|
|
|
lock($queue);
|
|
|
|
|
|
|
|
my $dq = $queue->dequeue();
|
|
|
|
if ($cmd ne $dq)
|
|
|
|
{
|
|
|
|
$queue->insert(0, $dq);
|
|
|
|
|
|
|
|
for (my $i = 0; $i < $queue->pending(); $i++)
|
|
|
|
{
|
|
|
|
if ($queue->peek($i) eq $cmd)
|
|
|
|
{
|
|
|
|
$queue->extract($i);
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Perform a synchronization of the temporary mirrored directory
|
2014-01-19 16:49:07 +00:00
|
|
|
sub sync
|
|
|
|
{
|
|
|
|
if (shift)
|
|
|
|
{
|
|
|
|
say "Full synchronization to $main::outdir";
|
|
|
|
|
|
|
|
my $tmpcopy = tempdir();
|
|
|
|
|
|
|
|
find(
|
|
|
|
sub
|
|
|
|
{
|
|
|
|
if (-f)
|
|
|
|
{
|
|
|
|
my $todir = $File::Find::dir."/";
|
2014-01-20 08:58:01 +00:00
|
|
|
if ($todir =~ /^\Q$main::tmpdir\E\/?(\Q$main::baseadmin\E|\Q$main::baseteams\E|\Q$main::baseerrors\E|\Q$main::basehome\E)(.*)$/)
|
2014-01-19 16:49:07 +00:00
|
|
|
{
|
|
|
|
$todir = $tmpcopy;
|
|
|
|
|
|
|
|
return if ($1 eq $main::baseadmin);
|
|
|
|
$todir .= $main::outteams if ($1 eq $main::baseteams);
|
2014-01-20 08:58:01 +00:00
|
|
|
$todir .= $main::outerrors if ($1 eq $main::baseerrors);
|
2014-01-19 16:49:07 +00:00
|
|
|
$todir .= $main::outhome if ($1 eq $main::basehome);
|
|
|
|
|
|
|
|
$todir .= $2;
|
|
|
|
}
|
2014-01-20 04:53:31 +00:00
|
|
|
make_path($todir, { mode => 0751 }) if (! -d $todir );
|
2014-01-19 16:49:07 +00:00
|
|
|
|
|
|
|
copy($File::Find::name, $todir) or warn(q{copy failed:} . $!);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
$tmpdir
|
|
|
|
);
|
|
|
|
|
|
|
|
abs_path($main::outdir);
|
|
|
|
abs_path($tmpcopy);
|
|
|
|
|
2014-11-20 21:55:31 +00:00
|
|
|
remove_tree($main::outdir, {keep_root => 1});
|
2014-01-19 16:49:07 +00:00
|
|
|
|
2014-01-20 04:53:31 +00:00
|
|
|
system("mv '$tmpcopy'/* '$main::outdir/'");
|
2014-01-19 16:49:07 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
say "Incremental synchronization to $main::outdir";
|
|
|
|
|
|
|
|
find(
|
|
|
|
sub
|
|
|
|
{
|
|
|
|
if (-f)
|
|
|
|
{
|
|
|
|
my $todir = $File::Find::dir."/";
|
|
|
|
if ($todir =~ /^\Q$main::tmpdir\E\/?(\Q$main::baseadmin\E|\Q$main::baseteams\E|\Q$main::basehome\E)(.*)$/)
|
|
|
|
{
|
|
|
|
$todir = $main::outdir;
|
|
|
|
|
|
|
|
return if ($1 eq $main::baseadmin);
|
|
|
|
$todir .= $main::outteams if ($1 eq $main::baseteams);
|
2014-01-20 08:58:01 +00:00
|
|
|
$todir .= $main::outerrors if ($1 eq $main::baseerrors);
|
2014-01-19 16:49:07 +00:00
|
|
|
$todir .= $main::outhome if ($1 eq $main::basehome);
|
|
|
|
|
|
|
|
$todir .= $2;
|
|
|
|
}
|
2014-01-20 04:53:31 +00:00
|
|
|
make_path($todir, { mode => 0751 }) if (! -d $todir );
|
2014-01-19 16:49:07 +00:00
|
|
|
|
|
|
|
say "$File::Find::name -> $todir";
|
|
|
|
|
|
|
|
copy($File::Find::name, $todir) or warn(q{copy failed:} . $!);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
$tmpdir
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Parse input command and enqueue them
|
2014-01-20 04:53:31 +00:00
|
|
|
sub parse($$;$)
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = shift;
|
|
|
|
my $change_current = 0;
|
2014-01-20 04:53:31 +00:00
|
|
|
my $cmds = shift;
|
|
|
|
my $chan_output = shift // \*STDOUT;
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-20 04:53:31 +00:00
|
|
|
for my $cmd ($cmds =~ /([^:]+)/g)
|
2014-01-19 13:51:18 +00:00
|
|
|
{
|
|
|
|
my $len = length($cmd);
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
# Search the right position
|
|
|
|
my $i;
|
|
|
|
for ($i = 0; $i < $queue->pending(); $i++)
|
|
|
|
{
|
|
|
|
last if ($len > length($queue->peek($i)));
|
|
|
|
}
|
2014-01-20 04:53:31 +00:00
|
|
|
say $chan_output "Inserting $cmd at position $i/".$queue->pending();
|
2014-01-19 13:51:18 +00:00
|
|
|
$queue->insert($i, $cmd);
|
|
|
|
$change_current = 1 if $i == 0 && $queue->pending() != 1;
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
if ($change_current)
|
|
|
|
{
|
|
|
|
say "Priority item have changed, stoping running threads";
|
|
|
|
$m->end();
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
|
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
lock($queue);
|
|
|
|
cond_broadcast($queue);
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
2014-01-19 13:51:18 +00:00
|
|
|
|
|
|
|
#print Dumper($queue);
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
### SOCKETS ###########################################################
|
2014-01-19 16:49:07 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Create the socket and wait for connection
|
2014-01-20 04:53:31 +00:00
|
|
|
sub create_socket
|
|
|
|
{
|
|
|
|
my $m = shift;
|
|
|
|
my $socket_path = abs_path( shift );
|
|
|
|
|
|
|
|
unlink($socket_path) if -e $socket_path;
|
|
|
|
|
|
|
|
my $socket = IO::Socket::UNIX->new(
|
|
|
|
Local => $socket_path,
|
|
|
|
Type => SOCK_STREAM,
|
|
|
|
Listen => SOMAXCONN,
|
|
|
|
);
|
|
|
|
say "Socket listening on $socket_path; waiting for connections...";
|
|
|
|
|
|
|
|
while(my $connection = $socket->accept)
|
|
|
|
{
|
|
|
|
say "New connexion, new thread ready for parsing actions!";
|
|
|
|
threads->create(\&socket_run, $m, $connection);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Manage the socket connection
|
2014-01-20 04:53:31 +00:00
|
|
|
sub socket_run
|
|
|
|
{
|
|
|
|
my $m = shift;
|
|
|
|
my $connection = shift;
|
|
|
|
|
|
|
|
$connection->autoflush(1);
|
|
|
|
say $connection "You are connected to gen_site.pl, please enter command:";
|
|
|
|
while (<$connection>)
|
|
|
|
{
|
|
|
|
chomp $_;
|
|
|
|
parse($m, $_, $connection);
|
|
|
|
}
|
|
|
|
say "Closing socket connection; stopping thread.";
|
|
|
|
close $connection;
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
### MAIN ################################################################
|
|
|
|
|
|
|
|
# Parse arguments
|
|
|
|
my $help;
|
|
|
|
GetOptions ("threads|thread|t=i" => \$threads,
|
|
|
|
"baseadmin|ba=s" => \$baseadmin,
|
|
|
|
"basehome|bh=s" => \$basehome,
|
|
|
|
"baseerrors|be=s" => \$baseerrors,
|
|
|
|
"baseteams|bt=s" => \$baseteams,
|
|
|
|
"outdir|out|o=s" => \$outdir,
|
|
|
|
"deamon|d" => \$deamon,
|
|
|
|
"socket|s=s" => \$socket,
|
|
|
|
"help|h|?" => \$help);
|
|
|
|
|
|
|
|
$outdir = abs_path($outdir);
|
|
|
|
|
|
|
|
# Daemon mode: run forever until stdin is open
|
2014-01-19 04:32:13 +00:00
|
|
|
if ($deamon)
|
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m :shared = Mirror->new();
|
|
|
|
|
|
|
|
$main_thread = threads->create(\&manage, $m);
|
|
|
|
|
2014-01-20 04:53:31 +00:00
|
|
|
threads->create(\&create_socket, $m, $socket) if ($socket);
|
|
|
|
|
2014-11-20 21:10:25 +00:00
|
|
|
while ($_ = shift) {
|
|
|
|
parse($m, $_);
|
|
|
|
}
|
|
|
|
|
2014-01-20 04:53:31 +00:00
|
|
|
while(<>)
|
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
chomp $_;
|
|
|
|
parse($m, $_);
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
2014-01-19 13:51:18 +00:00
|
|
|
|
|
|
|
parse($m, "J");
|
|
|
|
$main_thread->join();
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
# Run given commands
|
2014-01-19 04:32:13 +00:00
|
|
|
elsif (@ARGV)
|
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m :shared = Mirror->new();
|
|
|
|
|
|
|
|
$main_thread = threads->create(\&manage, $m);
|
|
|
|
|
2014-01-20 04:53:31 +00:00
|
|
|
threads->create(\&create_socket, $m, $socket) if ($socket);
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
while ($_ = shift) {
|
|
|
|
parse($m, $_);
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
2014-01-19 13:51:18 +00:00
|
|
|
|
|
|
|
parse($m, "J");
|
|
|
|
$main_thread->join();
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
# Just performs a full generation
|
2014-01-19 04:32:13 +00:00
|
|
|
else
|
|
|
|
{
|
2014-01-19 13:51:18 +00:00
|
|
|
my $m = genFull();
|
|
|
|
genHome($m);
|
|
|
|
|
|
|
|
$m->join();
|
2014-01-19 16:49:07 +00:00
|
|
|
|
|
|
|
sync(1);
|
2014-01-19 13:51:18 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Clean tmp
|
2014-01-19 16:49:07 +00:00
|
|
|
remove_tree($main::tmpdir);
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
|
|
|
|
package Mirror;
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Structure to store a mirror state of a website
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
use v5.10.1;
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use threads;
|
|
|
|
use threads::shared;
|
|
|
|
|
|
|
|
use Thread::Queue;
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Initialize the class
|
2014-01-19 13:51:18 +00:00
|
|
|
sub new($)
|
|
|
|
{
|
|
|
|
my $class = shift;
|
|
|
|
my $self :shared = shared_clone({
|
|
|
|
threads => [],
|
|
|
|
stop => 0,
|
|
|
|
});
|
|
|
|
|
|
|
|
bless $self, $class;
|
|
|
|
|
|
|
|
$self->start();
|
|
|
|
|
|
|
|
return $self;
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Launch threads and do more initialization
|
2014-01-19 13:51:18 +00:00
|
|
|
sub start($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
$self->{add} = Thread::Queue->new("#RESET");
|
|
|
|
$self->{todo} = Thread::Queue->new();
|
|
|
|
$self->{waiting} = 0;
|
|
|
|
$self->{end} = 0;
|
|
|
|
|
|
|
|
push @{ $self->{threads} }, threads->create(\&run_add, $self)->tid();
|
|
|
|
|
|
|
|
for (my $i = 0; $i < $main::threads; $i++) {
|
|
|
|
push @{ $self->{threads} }, threads->create(\&run, $self, $i + 1)->tid();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue a RESET
|
2014-01-19 13:51:18 +00:00
|
|
|
sub reset($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
$self->{add}->enqueue("#RESET");
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Clean queues and wait for stop threads
|
2014-01-19 13:51:18 +00:00
|
|
|
sub end($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
$self->{end} = 1;
|
|
|
|
|
|
|
|
while($self->{add}->dequeue_nb()) { }
|
|
|
|
while($self->{todo}->dequeue_nb()) { }
|
|
|
|
|
|
|
|
$self->reset();
|
|
|
|
|
|
|
|
{
|
|
|
|
lock($self);
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
cond_broadcast($self);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $self->join();
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
sub stop($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
$self->{stop} = 1;
|
|
|
|
|
|
|
|
return $self->end();
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Enqueue URLs
|
2014-01-19 13:51:18 +00:00
|
|
|
sub add_url($@)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
while (my $link = shift)
|
|
|
|
{
|
|
|
|
$self->{add}->enqueue($link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Function executed to prepare URL to fetch
|
2014-01-19 13:51:18 +00:00
|
|
|
sub run_add
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
my $id = shift // 0;
|
|
|
|
|
|
|
|
my %urlseen;
|
|
|
|
|
|
|
|
while (! $self->{stop})
|
|
|
|
{
|
|
|
|
my $link = $self->{add}->dequeue_nb();
|
|
|
|
|
|
|
|
if (! defined $link)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
lock($self);
|
|
|
|
$self->{waiting} += 1;
|
|
|
|
cond_broadcast($self);
|
|
|
|
}
|
|
|
|
$link = $self->{add}->dequeue();
|
|
|
|
last if (! defined $link);
|
|
|
|
{
|
|
|
|
lock($self);
|
|
|
|
$self->{waiting} -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
next if (exists $urlseen{$link});
|
|
|
|
|
|
|
|
if ($link eq "#RESET")
|
|
|
|
{
|
|
|
|
%urlseen = ();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$self->{todo}->enqueue($link);
|
|
|
|
$urlseen{$link} = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Main function to fetch pages
|
2014-01-19 13:51:18 +00:00
|
|
|
sub run
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
my $id = shift // 0;
|
|
|
|
|
|
|
|
while (! $self->{stop})
|
|
|
|
{
|
|
|
|
my $url = $self->{todo}->dequeue_nb();
|
|
|
|
|
|
|
|
if (! defined $url)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
lock($self);
|
|
|
|
$self->{waiting} += 1;
|
|
|
|
cond_broadcast($self);
|
|
|
|
}
|
|
|
|
$url = $self->{todo}->dequeue();
|
|
|
|
last if (! defined $url);
|
|
|
|
{
|
|
|
|
lock($self);
|
|
|
|
$self->{waiting} -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
say "[$id] $baseurl$url";
|
|
|
|
|
|
|
|
my $p = FicPage->new($baseurl . $url);
|
|
|
|
$p->fetch();
|
|
|
|
|
|
|
|
$p->toRightURL() if ($url !~ /\/$/);
|
|
|
|
|
|
|
|
my @links = $p->getNearLinks() if ! $self->{end};
|
|
|
|
|
|
|
|
$p->treatLinks();
|
|
|
|
$p->save();
|
|
|
|
|
|
|
|
for my $link (@links)
|
|
|
|
{
|
2014-01-20 08:58:01 +00:00
|
|
|
$self->add_url($link) if ($link ne $url && ($link =~ /^?\Q$url\E/) || $p->{url} =~ /\.css$/);
|
2014-01-19 13:51:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Join
|
2014-01-19 13:51:18 +00:00
|
|
|
sub join($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
sleep 1 if ($self->{waiting} == $main::threads + 1);
|
|
|
|
|
|
|
|
return ($self->{waiting} > 0 && ! $self->{end}) if ($self->{waiting} == $main::threads + 1);
|
|
|
|
|
|
|
|
{
|
|
|
|
lock($self);
|
|
|
|
cond_wait($self) until ($self->{waiting} >= $main::threads + 1 || $self->{waiting} < 0 || $self->{stop});
|
|
|
|
}
|
|
|
|
|
|
|
|
return ($self->{waiting} > 0 && ! $self->{end});
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-01-19 04:32:13 +00:00
|
|
|
package FicPage;
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Represent a web page in order to perform some treatment on it
|
|
|
|
|
2014-01-19 04:32:13 +00:00
|
|
|
use v5.10.1;
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
use File::Basename;
|
|
|
|
use File::Path qw(make_path);
|
|
|
|
use HTTP::Request::Common qw(GET POST);
|
|
|
|
use LWP::UserAgent;
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Initialize the class
|
2014-01-19 04:32:13 +00:00
|
|
|
sub new
|
|
|
|
{
|
|
|
|
my $class = shift;
|
|
|
|
my $self = {
|
|
|
|
url => shift,
|
|
|
|
};
|
|
|
|
|
|
|
|
bless $self, $class;
|
|
|
|
|
|
|
|
return $self;
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Fetch the page content
|
|
|
|
sub fetch($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
my $ua = LWP::UserAgent->new;
|
|
|
|
|
|
|
|
my $res = $ua->request(GET $self->{url});
|
|
|
|
|
|
|
|
$self->{content} = $res->content;
|
|
|
|
}
|
|
|
|
|
|
|
|
# If the URL store is a short one, try to expand it by looking to correct URL
|
2014-01-19 04:32:13 +00:00
|
|
|
sub toRightURL($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
my $search = $self->{url};
|
|
|
|
$search =~ s!/$!!;
|
|
|
|
|
2014-01-19 13:51:18 +00:00
|
|
|
for my $url ($self->getLinks())
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
|
|
|
$url = $baseurl . $url;
|
|
|
|
if ($url =~ /^\Q$search\E/)
|
|
|
|
{
|
|
|
|
$url =~ s!^(\Q$search\E[^/]*).*$!$1/!;
|
|
|
|
|
|
|
|
$self->{url} = $url;
|
|
|
|
return $url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $self->{url};
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Retrieve all links in the page content
|
2014-01-19 04:32:13 +00:00
|
|
|
sub getLinks($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
2014-01-19 16:49:07 +00:00
|
|
|
return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g;
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Search relative (and absolute linking to the site part) links
|
2014-01-19 04:32:13 +00:00
|
|
|
sub getNearLinks($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
2014-01-20 08:58:01 +00:00
|
|
|
if ($self->{url} =~ /\.css$/)
|
2014-01-19 16:49:07 +00:00
|
|
|
{
|
2014-01-20 08:58:01 +00:00
|
|
|
return $self->{content} =~ /url\(["']?(?:\Q$main::baseurl\E|(?:\/?\.\.)+)?([^:)"'?#]+)/g;
|
2014-01-19 16:49:07 +00:00
|
|
|
}
|
2014-01-20 08:58:01 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
my @links = $self->{content} =~ /(?:src|href)="(?:\Q$main::baseurl\E|(?:\/?\.\.)+)?([^:"]+)"/g;
|
2014-01-19 16:49:07 +00:00
|
|
|
|
2014-01-20 08:58:01 +00:00
|
|
|
for my $action ($self->{content} =~ /action="(?:\Q$main::baseurl\E|(?:\/?\.\.)+)?([^:"]+)"/g)
|
|
|
|
{
|
|
|
|
push @links, $action;
|
|
|
|
push @links, "$action/gerr";
|
|
|
|
push @links, "$action/serr";
|
|
|
|
}
|
|
|
|
|
|
|
|
return @links;
|
|
|
|
}
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Remove SALT base from URL contained in the page
|
2014-01-19 04:32:13 +00:00
|
|
|
sub treatLinks($)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
2014-01-19 16:49:07 +00:00
|
|
|
$self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Generate the path where saved the page content
|
|
|
|
sub getSavePath($;$)
|
2014-01-19 04:32:13 +00:00
|
|
|
{
|
|
|
|
my $self = shift;
|
2014-11-20 14:39:54 +00:00
|
|
|
my $basedir = shift // $main::tmpdir;
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Convert URL to real directory path
|
|
|
|
my $path = $self->{url};
|
|
|
|
$path =~ s/^\Q$main::baseurl\E//;
|
2014-01-19 04:32:13 +00:00
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
return "$basedir$path";
|
2014-01-19 04:32:13 +00:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Is the page already saved?
|
2014-01-19 04:32:13 +00:00
|
|
|
sub alreadySaved($;$)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
my $path = $self->getSavePath(@_);
|
|
|
|
return -f $path || ( -d $path && -f "$path/index.html" );
|
|
|
|
}
|
|
|
|
|
2014-11-20 14:39:54 +00:00
|
|
|
# Really save the page content
|
2014-01-19 04:32:13 +00:00
|
|
|
sub save($;$)
|
|
|
|
{
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
# Convert URL to real directory path
|
|
|
|
my $path = $self->getSavePath(@_);
|
|
|
|
|
|
|
|
eval
|
|
|
|
{
|
|
|
|
if ($path =~ /\.[a-z0-9]{2,4}$/)
|
|
|
|
{
|
|
|
|
eval {
|
2014-01-20 04:53:31 +00:00
|
|
|
make_path( dirname("$path") , { mode => 0751 }) if (! -d dirname("$path") );
|
2014-01-19 04:32:13 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
open my $fd, ">", $path or die "$path: $!";
|
|
|
|
print $fd $self->{content};
|
|
|
|
close $fd;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
eval {
|
2014-01-20 04:53:31 +00:00
|
|
|
make_path "$path", { mode => 0751 } if (! -d "$path");
|
2014-01-19 04:32:13 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
open my $fd, ">", "$path/index.html" or die "$path: $!";
|
|
|
|
print $fd $self->{content};
|
|
|
|
close $fd;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
print $@ if ($@);
|
|
|
|
}
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
2014-11-23 15:02:45 +00:00
|
|
|
FIC parallel WGET
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
=head1 USAGE
|
|
|
|
|
|
|
|
./gen_site.pl [OPTIONS] [COMMANDS]
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
TODO
|
|
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
|
|
|
|
=over
|
|
|
|
|
|
|
|
=item B<-baseadmin=string>
|
|
|
|
|
|
|
|
Called SALT_ADMIN in PHP part. Default: C</admin/>
|
|
|
|
|
|
|
|
=item B<-baseerrors=string>
|
|
|
|
|
|
|
|
There is no equivalent in PHP part, it's harcoded. Default: C</errors/>
|
|
|
|
|
|
|
|
=item B<-basehome=string>
|
|
|
|
|
|
|
|
Called SALT_PUBLIC in PHP part. Default: C</>
|
|
|
|
|
|
|
|
=item B<-baseteams=string>
|
|
|
|
|
|
|
|
Called SALT_USER in PHP part. Default: C</connected/>
|
|
|
|
|
|
|
|
=item B<-deamon>
|
|
|
|
|
|
|
|
Run forever (until JOIN scheduler instruction occurs).
|
|
|
|
|
|
|
|
=item B<-help>
|
|
|
|
|
|
|
|
Displays the help.
|
|
|
|
|
|
|
|
=item B<-outdir=path>
|
|
|
|
|
|
|
|
Path where save generated pages
|
|
|
|
|
|
|
|
=item B<-socket=path>
|
|
|
|
|
|
|
|
Path to the socket to create.
|
|
|
|
|
|
|
|
=item B<-threads=int>
|
|
|
|
|
|
|
|
Number of parallel fetch to perform.
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 SCHEDULER COMMANDS
|
|
|
|
|
|
|
|
=over
|
|
|
|
|
|
|
|
=item B<all>
|
|
|
|
|
|
|
|
Generate pages for all teams.
|
|
|
|
|
|
|
|
=item B<HOME>
|
|
|
|
|
|
|
|
Generate pages for the public part.
|
|
|
|
|
|
|
|
=item B<ERRORS>
|
|
|
|
|
|
|
|
Generate errors pages.
|
|
|
|
|
|
|
|
=item B<TEAM00>
|
|
|
|
|
|
|
|
Generate all pages for the team C<00>.
|
|
|
|
|
|
|
|
=item B<TEAM00,11>
|
|
|
|
|
|
|
|
Generate pages for the C<11> theme for the team C<00>.
|
|
|
|
|
|
|
|
=item B<reset>
|
|
|
|
|
2014-11-23 15:02:45 +00:00
|
|
|
Clean the temporary directory.
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
=item B<DSYNC>
|
|
|
|
|
|
|
|
Made a full synchronization of the output directory (remove existing output dir
|
|
|
|
and replace it by the current temporary content and reset temporary content).
|
|
|
|
|
|
|
|
=item B<SYNC>
|
|
|
|
|
|
|
|
Made a incremental synchronization with the output directory (just copy all
|
|
|
|
files in the current temporary content to the output dir, don't remove
|
|
|
|
anything).
|
|
|
|
|
|
|
|
=item B<LS>
|
|
|
|
|
|
|
|
Perform a C<ls> in the temporary directory content.
|
|
|
|
|
|
|
|
=item B<JOIN>
|
|
|
|
|
2014-11-23 15:02:45 +00:00
|
|
|
Flush the scheduler queue and wait for all jobs done.
|
|
|
|
|
|
|
|
=item B<RTEAMS>
|
|
|
|
|
|
|
|
Regenerate the nginx file containing teams IDs.
|
2014-11-20 14:39:54 +00:00
|
|
|
|
|
|
|
=item B<help>
|
|
|
|
|
|
|
|
Display some help.
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
|
|
|
|
=over
|
|
|
|
|
|
|
|
=item
|
|
|
|
|
|
|
|
perl >= 5.10.1, compilated with threads support
|
|
|
|
|
|
|
|
=item
|
|
|
|
|
|
|
|
HTTP::Request::Common
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
|
|
|
|
Pierre-Olivier Mercier <nemunaire@epita.fr>
|
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT
|
|
|
|
|
|
|
|
Copyright (c) 2014 Pierre-Olivier Mercier
|