Document gen_site.pl
This commit is contained in:
parent
ec69cea4f6
commit
4fe56db0d6
258
gen_site.pl
258
gen_site.pl
@ -1,4 +1,14 @@
|
||||
#!/usr/bin/env perl
|
||||
#=============================================================================
|
||||
#
|
||||
# USAGE: ./gen_site.pl [options] [commands]
|
||||
#
|
||||
# DESCRIPTION: More efficient wget -m
|
||||
#
|
||||
# AUTHOR: Pierre-Olivier Mercier <nemunaire@epita.fr>
|
||||
# ORGANIZATION: EPITA SRS
|
||||
#
|
||||
#=============================================================================
|
||||
|
||||
use v5.10.1;
|
||||
use strict;
|
||||
@ -16,6 +26,8 @@ use Getopt::Long;
|
||||
use IO::Socket;
|
||||
use Thread::Queue;
|
||||
|
||||
### GLOBALS ###########################################################
|
||||
|
||||
our $outdir = "outest";
|
||||
our $outteams = "/teams/";
|
||||
our $outerrors = "/errors/";
|
||||
@ -29,6 +41,16 @@ our $baseerrors = "/errors/";
|
||||
our $baseteams = "/connected/";
|
||||
our $threads = 6;
|
||||
|
||||
my $deamon;
|
||||
my $socket;
|
||||
|
||||
my $queue :shared = Thread::Queue->new();
|
||||
my $main_thread;
|
||||
|
||||
|
||||
### GENERATORS ########################################################
|
||||
|
||||
# Enqueue error pages in the mirror
|
||||
sub genErrors(;$)
|
||||
{
|
||||
my $m = shift // Mirror->new();
|
||||
@ -43,6 +65,7 @@ sub genErrors(;$)
|
||||
return $m;
|
||||
}
|
||||
|
||||
# Enqueue public pages in the mirror
|
||||
sub genHome(;$)
|
||||
{
|
||||
my $m = shift // Mirror->new();
|
||||
@ -54,6 +77,7 @@ sub genHome(;$)
|
||||
return $m;
|
||||
}
|
||||
|
||||
# Enqueue all teams in the mirror
|
||||
sub genFull(;$)
|
||||
{
|
||||
my $m = shift // Mirror->new();
|
||||
@ -62,6 +86,7 @@ sub genFull(;$)
|
||||
return $m;
|
||||
}
|
||||
|
||||
# Enqueue a team in the mirror
|
||||
sub genTeam($;$)
|
||||
{
|
||||
my $team_id = shift;
|
||||
@ -72,6 +97,7 @@ sub genTeam($;$)
|
||||
return $m;
|
||||
}
|
||||
|
||||
# Enqueue theme pages for a given team in the mirror
|
||||
sub genTeamTheme($$;$)
|
||||
{
|
||||
my $team_id = shift;
|
||||
@ -84,9 +110,9 @@ sub genTeamTheme($$;$)
|
||||
}
|
||||
|
||||
|
||||
my $queue :shared = Thread::Queue->new();
|
||||
my $main_thread;
|
||||
### TOOLS #############################################################
|
||||
|
||||
# Manage the mirror
|
||||
sub manage
|
||||
{
|
||||
my $m = shift // Mirror->new();
|
||||
@ -187,6 +213,7 @@ sub manage
|
||||
}
|
||||
}
|
||||
|
||||
# Perform a synchronization of the temporary mirrored directory
|
||||
sub sync
|
||||
{
|
||||
if (shift)
|
||||
@ -261,6 +288,7 @@ sub sync
|
||||
}
|
||||
}
|
||||
|
||||
# Parse input command and enqueue them
|
||||
sub parse($$;$)
|
||||
{
|
||||
my $m = shift;
|
||||
@ -298,19 +326,9 @@ sub parse($$;$)
|
||||
}
|
||||
|
||||
|
||||
# Parse arguments
|
||||
my $help; my $deamon; my $socket;
|
||||
GetOptions ("threads|thread|t=i" => \$threads,
|
||||
"baseadmin|ba=s" => \$baseadmin,
|
||||
"basehome|bh=s" => \$basehome,
|
||||
"baseteams|bt=s" => \$baseteams,
|
||||
"outdir|out|o=s" => \$outdir,
|
||||
"deamon|d" => \$deamon,
|
||||
"socket|s=s" => \$socket,
|
||||
"help|h|?" => \$help);
|
||||
|
||||
$outdir = abs_path($outdir);
|
||||
### SOCKETS ###########################################################
|
||||
|
||||
# Create the socket and wait for connection
|
||||
sub create_socket
|
||||
{
|
||||
my $m = shift;
|
||||
@ -332,6 +350,7 @@ sub create_socket
|
||||
}
|
||||
}
|
||||
|
||||
# Manage the socket connection
|
||||
sub socket_run
|
||||
{
|
||||
my $m = shift;
|
||||
@ -348,6 +367,24 @@ sub socket_run
|
||||
close $connection;
|
||||
}
|
||||
|
||||
|
||||
### MAIN ################################################################
|
||||
|
||||
# Parse arguments
|
||||
my $help;
|
||||
GetOptions ("threads|thread|t=i" => \$threads,
|
||||
"baseadmin|ba=s" => \$baseadmin,
|
||||
"basehome|bh=s" => \$basehome,
|
||||
"baseerrors|be=s" => \$baseerrors,
|
||||
"baseteams|bt=s" => \$baseteams,
|
||||
"outdir|out|o=s" => \$outdir,
|
||||
"deamon|d" => \$deamon,
|
||||
"socket|s=s" => \$socket,
|
||||
"help|h|?" => \$help);
|
||||
|
||||
$outdir = abs_path($outdir);
|
||||
|
||||
# Daemon mode: run forever until stdin is open
|
||||
if ($deamon)
|
||||
{
|
||||
my $m :shared = Mirror->new();
|
||||
@ -365,6 +402,8 @@ if ($deamon)
|
||||
parse($m, "J");
|
||||
$main_thread->join();
|
||||
}
|
||||
|
||||
# Run given commands
|
||||
elsif (@ARGV)
|
||||
{
|
||||
my $m :shared = Mirror->new();
|
||||
@ -380,6 +419,8 @@ elsif (@ARGV)
|
||||
parse($m, "J");
|
||||
$main_thread->join();
|
||||
}
|
||||
|
||||
# Just performs a full generation
|
||||
else
|
||||
{
|
||||
my $m = genFull();
|
||||
@ -390,11 +431,14 @@ else
|
||||
sync(1);
|
||||
}
|
||||
|
||||
# Clean tmp
|
||||
remove_tree($main::tmpdir);
|
||||
|
||||
|
||||
package Mirror;
|
||||
|
||||
# Structure to store a mirror state of a website
|
||||
|
||||
use v5.10.1;
|
||||
use strict;
|
||||
use warnings;
|
||||
@ -403,6 +447,7 @@ use threads::shared;
|
||||
|
||||
use Thread::Queue;
|
||||
|
||||
# Initialize the class
|
||||
sub new($)
|
||||
{
|
||||
my $class = shift;
|
||||
@ -418,6 +463,7 @@ sub new($)
|
||||
return $self;
|
||||
}
|
||||
|
||||
# Launch threads and do more initialization
|
||||
sub start($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -434,6 +480,7 @@ sub start($)
|
||||
}
|
||||
}
|
||||
|
||||
# Enqueue a RESET
|
||||
sub reset($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -441,6 +488,7 @@ sub reset($)
|
||||
$self->{add}->enqueue("#RESET");
|
||||
}
|
||||
|
||||
# Clean queues and wait for stop threads
|
||||
sub end($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -470,6 +518,7 @@ sub stop($)
|
||||
return $self->end();
|
||||
}
|
||||
|
||||
# Enqueue URLs
|
||||
sub add_url($@)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -480,6 +529,7 @@ sub add_url($@)
|
||||
}
|
||||
}
|
||||
|
||||
# Function executed to prepare URL to fetch
|
||||
sub run_add
|
||||
{
|
||||
my $self = shift;
|
||||
@ -520,6 +570,7 @@ sub run_add
|
||||
}
|
||||
}
|
||||
|
||||
# Main function to fetch pages
|
||||
sub run
|
||||
{
|
||||
my $self = shift;
|
||||
@ -563,6 +614,7 @@ sub run
|
||||
}
|
||||
}
|
||||
|
||||
# Join
|
||||
sub join($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -582,6 +634,8 @@ sub join($)
|
||||
|
||||
package FicPage;
|
||||
|
||||
# Represent a web page in order to perform some treatment on it
|
||||
|
||||
use v5.10.1;
|
||||
use strict;
|
||||
use warnings;
|
||||
@ -591,6 +645,7 @@ use File::Path qw(make_path);
|
||||
use HTTP::Request::Common qw(GET POST);
|
||||
use LWP::UserAgent;
|
||||
|
||||
# Initialize the class
|
||||
sub new
|
||||
{
|
||||
my $class = shift;
|
||||
@ -603,6 +658,18 @@ sub new
|
||||
return $self;
|
||||
}
|
||||
|
||||
# Fetch the page content
|
||||
sub fetch($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $ua = LWP::UserAgent->new;
|
||||
|
||||
my $res = $ua->request(GET $self->{url});
|
||||
|
||||
$self->{content} = $res->content;
|
||||
}
|
||||
|
||||
# If the URL store is a short one, try to expand it by looking to correct URL
|
||||
sub toRightURL($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -625,6 +692,7 @@ sub toRightURL($)
|
||||
return $self->{url};
|
||||
}
|
||||
|
||||
# Retrieve all links in the page content
|
||||
sub getLinks($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -632,6 +700,7 @@ sub getLinks($)
|
||||
return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g;
|
||||
}
|
||||
|
||||
# Search relative (and absolute linking to the site part) links
|
||||
sub getNearLinks($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -655,6 +724,7 @@ sub getNearLinks($)
|
||||
}
|
||||
}
|
||||
|
||||
# Remove SALT base from URL contained in the page
|
||||
sub treatLinks($)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -662,24 +732,7 @@ sub treatLinks($)
|
||||
$self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
|
||||
}
|
||||
|
||||
sub fetch($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $ua = LWP::UserAgent->new;
|
||||
|
||||
my $res = $ua->request(GET $self->{url});
|
||||
|
||||
$self->{content} = $res->content;
|
||||
}
|
||||
|
||||
sub alreadySaved($;$)
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
my $path = $self->getSavePath(@_);
|
||||
return -f $path || ( -d $path && -f "$path/index.html" );
|
||||
}
|
||||
|
||||
# Generate the path where saved the page content
|
||||
sub getSavePath($;$)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -692,6 +745,16 @@ sub getSavePath($;$)
|
||||
return "$basedir$path";
|
||||
}
|
||||
|
||||
# Is the page already saved?
|
||||
sub alreadySaved($;$)
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
my $path = $self->getSavePath(@_);
|
||||
return -f $path || ( -d $path && -f "$path/index.html" );
|
||||
}
|
||||
|
||||
# Really save the page content
|
||||
sub save($;$)
|
||||
{
|
||||
my $self = shift;
|
||||
@ -724,3 +787,134 @@ sub save($;$)
|
||||
};
|
||||
print $@ if ($@);
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Dave Null - The netiquette's guardian angel
|
||||
|
||||
=head1 USAGE
|
||||
|
||||
./gen_site.pl [OPTIONS] [COMMANDS]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
TODO
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item B<-baseadmin=string>
|
||||
|
||||
Called SALT_ADMIN in PHP part. Default: C</admin/>
|
||||
|
||||
=item B<-baseerrors=string>
|
||||
|
||||
There is no equivalent in PHP part, it's harcoded. Default: C</errors/>
|
||||
|
||||
=item B<-basehome=string>
|
||||
|
||||
Called SALT_PUBLIC in PHP part. Default: C</>
|
||||
|
||||
=item B<-baseteams=string>
|
||||
|
||||
Called SALT_USER in PHP part. Default: C</connected/>
|
||||
|
||||
=item B<-deamon>
|
||||
|
||||
Run forever (until JOIN scheduler instruction occurs).
|
||||
|
||||
=item B<-help>
|
||||
|
||||
Displays the help.
|
||||
|
||||
=item B<-outdir=path>
|
||||
|
||||
Path where save generated pages
|
||||
|
||||
=item B<-socket=path>
|
||||
|
||||
Path to the socket to create.
|
||||
|
||||
=item B<-threads=int>
|
||||
|
||||
Number of parallel fetch to perform.
|
||||
|
||||
=back
|
||||
|
||||
=head1 SCHEDULER COMMANDS
|
||||
|
||||
=over
|
||||
|
||||
=item B<all>
|
||||
|
||||
Generate pages for all teams.
|
||||
|
||||
=item B<HOME>
|
||||
|
||||
Generate pages for the public part.
|
||||
|
||||
=item B<ERRORS>
|
||||
|
||||
Generate errors pages.
|
||||
|
||||
=item B<TEAM00>
|
||||
|
||||
Generate all pages for the team C<00>.
|
||||
|
||||
=item B<TEAM00,11>
|
||||
|
||||
Generate pages for the C<11> theme for the team C<00>.
|
||||
|
||||
=item B<reset>
|
||||
|
||||
|
||||
|
||||
=item B<DSYNC>
|
||||
|
||||
Made a full synchronization of the output directory (remove existing output dir
|
||||
and replace it by the current temporary content and reset temporary content).
|
||||
|
||||
=item B<SYNC>
|
||||
|
||||
Made a incremental synchronization with the output directory (just copy all
|
||||
files in the current temporary content to the output dir, don't remove
|
||||
anything).
|
||||
|
||||
=item B<LS>
|
||||
|
||||
Perform a C<ls> in the temporary directory content.
|
||||
|
||||
=item B<JOIN>
|
||||
|
||||
Flush the scheduler queue and
|
||||
|
||||
=item B<help>
|
||||
|
||||
Display some help.
|
||||
|
||||
=back
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
=over
|
||||
|
||||
=item
|
||||
|
||||
perl >= 5.10.1, compilated with threads support
|
||||
|
||||
=item
|
||||
|
||||
HTTP::Request::Common
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Pierre-Olivier Mercier <nemunaire@epita.fr>
|
||||
|
||||
=head1 LICENSE AND COPYRIGHT
|
||||
|
||||
Copyright (c) 2014 Pierre-Olivier Mercier
|
||||
|
Loading…
Reference in New Issue
Block a user