Document gen_site.pl
This commit is contained in:
parent
ec69cea4f6
commit
4fe56db0d6
1 changed files with 226 additions and 32 deletions
258
gen_site.pl
258
gen_site.pl
|
@ -1,4 +1,14 @@
|
||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
|
#=============================================================================
|
||||||
|
#
|
||||||
|
# USAGE: ./gen_site.pl [options] [commands]
|
||||||
|
#
|
||||||
|
# DESCRIPTION: More efficient wget -m
|
||||||
|
#
|
||||||
|
# AUTHOR: Pierre-Olivier Mercier <nemunaire@epita.fr>
|
||||||
|
# ORGANIZATION: EPITA SRS
|
||||||
|
#
|
||||||
|
#=============================================================================
|
||||||
|
|
||||||
use v5.10.1;
|
use v5.10.1;
|
||||||
use strict;
|
use strict;
|
||||||
|
@ -16,6 +26,8 @@ use Getopt::Long;
|
||||||
use IO::Socket;
|
use IO::Socket;
|
||||||
use Thread::Queue;
|
use Thread::Queue;
|
||||||
|
|
||||||
|
### GLOBALS ###########################################################
|
||||||
|
|
||||||
our $outdir = "outest";
|
our $outdir = "outest";
|
||||||
our $outteams = "/teams/";
|
our $outteams = "/teams/";
|
||||||
our $outerrors = "/errors/";
|
our $outerrors = "/errors/";
|
||||||
|
@ -29,6 +41,16 @@ our $baseerrors = "/errors/";
|
||||||
our $baseteams = "/connected/";
|
our $baseteams = "/connected/";
|
||||||
our $threads = 6;
|
our $threads = 6;
|
||||||
|
|
||||||
|
my $deamon;
|
||||||
|
my $socket;
|
||||||
|
|
||||||
|
my $queue :shared = Thread::Queue->new();
|
||||||
|
my $main_thread;
|
||||||
|
|
||||||
|
|
||||||
|
### GENERATORS ########################################################
|
||||||
|
|
||||||
|
# Enqueue error pages in the mirror
|
||||||
sub genErrors(;$)
|
sub genErrors(;$)
|
||||||
{
|
{
|
||||||
my $m = shift // Mirror->new();
|
my $m = shift // Mirror->new();
|
||||||
|
@ -43,6 +65,7 @@ sub genErrors(;$)
|
||||||
return $m;
|
return $m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue public pages in the mirror
|
||||||
sub genHome(;$)
|
sub genHome(;$)
|
||||||
{
|
{
|
||||||
my $m = shift // Mirror->new();
|
my $m = shift // Mirror->new();
|
||||||
|
@ -54,6 +77,7 @@ sub genHome(;$)
|
||||||
return $m;
|
return $m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue all teams in the mirror
|
||||||
sub genFull(;$)
|
sub genFull(;$)
|
||||||
{
|
{
|
||||||
my $m = shift // Mirror->new();
|
my $m = shift // Mirror->new();
|
||||||
|
@ -62,6 +86,7 @@ sub genFull(;$)
|
||||||
return $m;
|
return $m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue a team in the mirror
|
||||||
sub genTeam($;$)
|
sub genTeam($;$)
|
||||||
{
|
{
|
||||||
my $team_id = shift;
|
my $team_id = shift;
|
||||||
|
@ -72,6 +97,7 @@ sub genTeam($;$)
|
||||||
return $m;
|
return $m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue theme pages for a given team in the mirror
|
||||||
sub genTeamTheme($$;$)
|
sub genTeamTheme($$;$)
|
||||||
{
|
{
|
||||||
my $team_id = shift;
|
my $team_id = shift;
|
||||||
|
@ -84,9 +110,9 @@ sub genTeamTheme($$;$)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
my $queue :shared = Thread::Queue->new();
|
### TOOLS #############################################################
|
||||||
my $main_thread;
|
|
||||||
|
|
||||||
|
# Manage the mirror
|
||||||
sub manage
|
sub manage
|
||||||
{
|
{
|
||||||
my $m = shift // Mirror->new();
|
my $m = shift // Mirror->new();
|
||||||
|
@ -187,6 +213,7 @@ sub manage
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Perform a synchronization of the temporary mirrored directory
|
||||||
sub sync
|
sub sync
|
||||||
{
|
{
|
||||||
if (shift)
|
if (shift)
|
||||||
|
@ -261,6 +288,7 @@ sub sync
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Parse input command and enqueue them
|
||||||
sub parse($$;$)
|
sub parse($$;$)
|
||||||
{
|
{
|
||||||
my $m = shift;
|
my $m = shift;
|
||||||
|
@ -298,19 +326,9 @@ sub parse($$;$)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Parse arguments
|
### SOCKETS ###########################################################
|
||||||
my $help; my $deamon; my $socket;
|
|
||||||
GetOptions ("threads|thread|t=i" => \$threads,
|
|
||||||
"baseadmin|ba=s" => \$baseadmin,
|
|
||||||
"basehome|bh=s" => \$basehome,
|
|
||||||
"baseteams|bt=s" => \$baseteams,
|
|
||||||
"outdir|out|o=s" => \$outdir,
|
|
||||||
"deamon|d" => \$deamon,
|
|
||||||
"socket|s=s" => \$socket,
|
|
||||||
"help|h|?" => \$help);
|
|
||||||
|
|
||||||
$outdir = abs_path($outdir);
|
|
||||||
|
|
||||||
|
# Create the socket and wait for connection
|
||||||
sub create_socket
|
sub create_socket
|
||||||
{
|
{
|
||||||
my $m = shift;
|
my $m = shift;
|
||||||
|
@ -332,6 +350,7 @@ sub create_socket
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Manage the socket connection
|
||||||
sub socket_run
|
sub socket_run
|
||||||
{
|
{
|
||||||
my $m = shift;
|
my $m = shift;
|
||||||
|
@ -348,6 +367,24 @@ sub socket_run
|
||||||
close $connection;
|
close $connection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### MAIN ################################################################
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
my $help;
|
||||||
|
GetOptions ("threads|thread|t=i" => \$threads,
|
||||||
|
"baseadmin|ba=s" => \$baseadmin,
|
||||||
|
"basehome|bh=s" => \$basehome,
|
||||||
|
"baseerrors|be=s" => \$baseerrors,
|
||||||
|
"baseteams|bt=s" => \$baseteams,
|
||||||
|
"outdir|out|o=s" => \$outdir,
|
||||||
|
"deamon|d" => \$deamon,
|
||||||
|
"socket|s=s" => \$socket,
|
||||||
|
"help|h|?" => \$help);
|
||||||
|
|
||||||
|
$outdir = abs_path($outdir);
|
||||||
|
|
||||||
|
# Daemon mode: run forever until stdin is open
|
||||||
if ($deamon)
|
if ($deamon)
|
||||||
{
|
{
|
||||||
my $m :shared = Mirror->new();
|
my $m :shared = Mirror->new();
|
||||||
|
@ -365,6 +402,8 @@ if ($deamon)
|
||||||
parse($m, "J");
|
parse($m, "J");
|
||||||
$main_thread->join();
|
$main_thread->join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Run given commands
|
||||||
elsif (@ARGV)
|
elsif (@ARGV)
|
||||||
{
|
{
|
||||||
my $m :shared = Mirror->new();
|
my $m :shared = Mirror->new();
|
||||||
|
@ -380,6 +419,8 @@ elsif (@ARGV)
|
||||||
parse($m, "J");
|
parse($m, "J");
|
||||||
$main_thread->join();
|
$main_thread->join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Just performs a full generation
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
my $m = genFull();
|
my $m = genFull();
|
||||||
|
@ -390,11 +431,14 @@ else
|
||||||
sync(1);
|
sync(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Clean tmp
|
||||||
remove_tree($main::tmpdir);
|
remove_tree($main::tmpdir);
|
||||||
|
|
||||||
|
|
||||||
package Mirror;
|
package Mirror;
|
||||||
|
|
||||||
|
# Structure to store a mirror state of a website
|
||||||
|
|
||||||
use v5.10.1;
|
use v5.10.1;
|
||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
@ -403,6 +447,7 @@ use threads::shared;
|
||||||
|
|
||||||
use Thread::Queue;
|
use Thread::Queue;
|
||||||
|
|
||||||
|
# Initialize the class
|
||||||
sub new($)
|
sub new($)
|
||||||
{
|
{
|
||||||
my $class = shift;
|
my $class = shift;
|
||||||
|
@ -418,6 +463,7 @@ sub new($)
|
||||||
return $self;
|
return $self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Launch threads and do more initialization
|
||||||
sub start($)
|
sub start($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -434,6 +480,7 @@ sub start($)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue a RESET
|
||||||
sub reset($)
|
sub reset($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -441,6 +488,7 @@ sub reset($)
|
||||||
$self->{add}->enqueue("#RESET");
|
$self->{add}->enqueue("#RESET");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Clean queues and wait for stop threads
|
||||||
sub end($)
|
sub end($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -470,6 +518,7 @@ sub stop($)
|
||||||
return $self->end();
|
return $self->end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Enqueue URLs
|
||||||
sub add_url($@)
|
sub add_url($@)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -480,6 +529,7 @@ sub add_url($@)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Function executed to prepare URL to fetch
|
||||||
sub run_add
|
sub run_add
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -520,6 +570,7 @@ sub run_add
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Main function to fetch pages
|
||||||
sub run
|
sub run
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -563,6 +614,7 @@ sub run
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Join
|
||||||
sub join($)
|
sub join($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -582,6 +634,8 @@ sub join($)
|
||||||
|
|
||||||
package FicPage;
|
package FicPage;
|
||||||
|
|
||||||
|
# Represent a web page in order to perform some treatment on it
|
||||||
|
|
||||||
use v5.10.1;
|
use v5.10.1;
|
||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
@ -591,6 +645,7 @@ use File::Path qw(make_path);
|
||||||
use HTTP::Request::Common qw(GET POST);
|
use HTTP::Request::Common qw(GET POST);
|
||||||
use LWP::UserAgent;
|
use LWP::UserAgent;
|
||||||
|
|
||||||
|
# Initialize the class
|
||||||
sub new
|
sub new
|
||||||
{
|
{
|
||||||
my $class = shift;
|
my $class = shift;
|
||||||
|
@ -603,6 +658,18 @@ sub new
|
||||||
return $self;
|
return $self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Fetch the page content
|
||||||
|
sub fetch($)
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $ua = LWP::UserAgent->new;
|
||||||
|
|
||||||
|
my $res = $ua->request(GET $self->{url});
|
||||||
|
|
||||||
|
$self->{content} = $res->content;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If the URL store is a short one, try to expand it by looking to correct URL
|
||||||
sub toRightURL($)
|
sub toRightURL($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -625,6 +692,7 @@ sub toRightURL($)
|
||||||
return $self->{url};
|
return $self->{url};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Retrieve all links in the page content
|
||||||
sub getLinks($)
|
sub getLinks($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -632,6 +700,7 @@ sub getLinks($)
|
||||||
return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g;
|
return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Search relative (and absolute linking to the site part) links
|
||||||
sub getNearLinks($)
|
sub getNearLinks($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -655,6 +724,7 @@ sub getNearLinks($)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Remove SALT base from URL contained in the page
|
||||||
sub treatLinks($)
|
sub treatLinks($)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -662,24 +732,7 @@ sub treatLinks($)
|
||||||
$self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
|
$self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub fetch($)
|
# Generate the path where saved the page content
|
||||||
{
|
|
||||||
my $self = shift;
|
|
||||||
my $ua = LWP::UserAgent->new;
|
|
||||||
|
|
||||||
my $res = $ua->request(GET $self->{url});
|
|
||||||
|
|
||||||
$self->{content} = $res->content;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub alreadySaved($;$)
|
|
||||||
{
|
|
||||||
my $self = shift;
|
|
||||||
|
|
||||||
my $path = $self->getSavePath(@_);
|
|
||||||
return -f $path || ( -d $path && -f "$path/index.html" );
|
|
||||||
}
|
|
||||||
|
|
||||||
sub getSavePath($;$)
|
sub getSavePath($;$)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -692,6 +745,16 @@ sub getSavePath($;$)
|
||||||
return "$basedir$path";
|
return "$basedir$path";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Is the page already saved?
|
||||||
|
sub alreadySaved($;$)
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
my $path = $self->getSavePath(@_);
|
||||||
|
return -f $path || ( -d $path && -f "$path/index.html" );
|
||||||
|
}
|
||||||
|
|
||||||
|
# Really save the page content
|
||||||
sub save($;$)
|
sub save($;$)
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
@ -724,3 +787,134 @@ sub save($;$)
|
||||||
};
|
};
|
||||||
print $@ if ($@);
|
print $@ if ($@);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
Dave Null - The netiquette's guardian angel
|
||||||
|
|
||||||
|
=head1 USAGE
|
||||||
|
|
||||||
|
./gen_site.pl [OPTIONS] [COMMANDS]
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
=head1 OPTIONS
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item B<-baseadmin=string>
|
||||||
|
|
||||||
|
Called SALT_ADMIN in PHP part. Default: C</admin/>
|
||||||
|
|
||||||
|
=item B<-baseerrors=string>
|
||||||
|
|
||||||
|
There is no equivalent in PHP part, it's harcoded. Default: C</errors/>
|
||||||
|
|
||||||
|
=item B<-basehome=string>
|
||||||
|
|
||||||
|
Called SALT_PUBLIC in PHP part. Default: C</>
|
||||||
|
|
||||||
|
=item B<-baseteams=string>
|
||||||
|
|
||||||
|
Called SALT_USER in PHP part. Default: C</connected/>
|
||||||
|
|
||||||
|
=item B<-deamon>
|
||||||
|
|
||||||
|
Run forever (until JOIN scheduler instruction occurs).
|
||||||
|
|
||||||
|
=item B<-help>
|
||||||
|
|
||||||
|
Displays the help.
|
||||||
|
|
||||||
|
=item B<-outdir=path>
|
||||||
|
|
||||||
|
Path where save generated pages
|
||||||
|
|
||||||
|
=item B<-socket=path>
|
||||||
|
|
||||||
|
Path to the socket to create.
|
||||||
|
|
||||||
|
=item B<-threads=int>
|
||||||
|
|
||||||
|
Number of parallel fetch to perform.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 SCHEDULER COMMANDS
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item B<all>
|
||||||
|
|
||||||
|
Generate pages for all teams.
|
||||||
|
|
||||||
|
=item B<HOME>
|
||||||
|
|
||||||
|
Generate pages for the public part.
|
||||||
|
|
||||||
|
=item B<ERRORS>
|
||||||
|
|
||||||
|
Generate errors pages.
|
||||||
|
|
||||||
|
=item B<TEAM00>
|
||||||
|
|
||||||
|
Generate all pages for the team C<00>.
|
||||||
|
|
||||||
|
=item B<TEAM00,11>
|
||||||
|
|
||||||
|
Generate pages for the C<11> theme for the team C<00>.
|
||||||
|
|
||||||
|
=item B<reset>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=item B<DSYNC>
|
||||||
|
|
||||||
|
Made a full synchronization of the output directory (remove existing output dir
|
||||||
|
and replace it by the current temporary content and reset temporary content).
|
||||||
|
|
||||||
|
=item B<SYNC>
|
||||||
|
|
||||||
|
Made a incremental synchronization with the output directory (just copy all
|
||||||
|
files in the current temporary content to the output dir, don't remove
|
||||||
|
anything).
|
||||||
|
|
||||||
|
=item B<LS>
|
||||||
|
|
||||||
|
Perform a C<ls> in the temporary directory content.
|
||||||
|
|
||||||
|
=item B<JOIN>
|
||||||
|
|
||||||
|
Flush the scheduler queue and
|
||||||
|
|
||||||
|
=item B<help>
|
||||||
|
|
||||||
|
Display some help.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 DEPENDENCIES
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item
|
||||||
|
|
||||||
|
perl >= 5.10.1, compilated with threads support
|
||||||
|
|
||||||
|
=item
|
||||||
|
|
||||||
|
HTTP::Request::Common
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
Pierre-Olivier Mercier <nemunaire@epita.fr>
|
||||||
|
|
||||||
|
=head1 LICENSE AND COPYRIGHT
|
||||||
|
|
||||||
|
Copyright (c) 2014 Pierre-Olivier Mercier
|
||||||
|
|
Reference in a new issue