Document gen_site.pl

This commit is contained in:
nemunaire 2014-11-20 15:39:54 +01:00
parent ec69cea4f6
commit 4fe56db0d6

View File

@ -1,4 +1,14 @@
#!/usr/bin/env perl
#=============================================================================
#
# USAGE: ./gen_site.pl [options] [commands]
#
# DESCRIPTION: More efficient wget -m
#
# AUTHOR: Pierre-Olivier Mercier <nemunaire@epita.fr>
# ORGANIZATION: EPITA SRS
#
#=============================================================================
use v5.10.1;
use strict;
@ -16,6 +26,8 @@ use Getopt::Long;
use IO::Socket;
use Thread::Queue;
### GLOBALS ###########################################################
our $outdir = "outest";
our $outteams = "/teams/";
our $outerrors = "/errors/";
@ -29,6 +41,16 @@ our $baseerrors = "/errors/";
our $baseteams = "/connected/";
our $threads = 6;
my $deamon;
my $socket;
my $queue :shared = Thread::Queue->new();
my $main_thread;
### GENERATORS ########################################################
# Enqueue error pages in the mirror
sub genErrors(;$)
{
my $m = shift // Mirror->new();
@ -43,6 +65,7 @@ sub genErrors(;$)
return $m;
}
# Enqueue public pages in the mirror
sub genHome(;$)
{
my $m = shift // Mirror->new();
@ -54,6 +77,7 @@ sub genHome(;$)
return $m;
}
# Enqueue all teams in the mirror
sub genFull(;$)
{
my $m = shift // Mirror->new();
@ -62,6 +86,7 @@ sub genFull(;$)
return $m;
}
# Enqueue a team in the mirror
sub genTeam($;$)
{
my $team_id = shift;
@ -72,6 +97,7 @@ sub genTeam($;$)
return $m;
}
# Enqueue theme pages for a given team in the mirror
sub genTeamTheme($$;$)
{
my $team_id = shift;
@ -84,9 +110,9 @@ sub genTeamTheme($$;$)
}
my $queue :shared = Thread::Queue->new();
my $main_thread;
### TOOLS #############################################################
# Manage the mirror
sub manage
{
my $m = shift // Mirror->new();
@ -187,6 +213,7 @@ sub manage
}
}
# Perform a synchronization of the temporary mirrored directory
sub sync
{
if (shift)
@ -261,6 +288,7 @@ sub sync
}
}
# Parse input command and enqueue them
sub parse($$;$)
{
my $m = shift;
@ -298,19 +326,9 @@ sub parse($$;$)
}
# Parse arguments
my $help; my $deamon; my $socket;
GetOptions ("threads|thread|t=i" => \$threads,
"baseadmin|ba=s" => \$baseadmin,
"basehome|bh=s" => \$basehome,
"baseteams|bt=s" => \$baseteams,
"outdir|out|o=s" => \$outdir,
"deamon|d" => \$deamon,
"socket|s=s" => \$socket,
"help|h|?" => \$help);
$outdir = abs_path($outdir);
### SOCKETS ###########################################################
# Create the socket and wait for connection
sub create_socket
{
my $m = shift;
@ -332,6 +350,7 @@ sub create_socket
}
}
# Manage the socket connection
sub socket_run
{
my $m = shift;
@ -348,6 +367,24 @@ sub socket_run
close $connection;
}
### MAIN ################################################################
# Parse arguments
my $help;
GetOptions ("threads|thread|t=i" => \$threads,
"baseadmin|ba=s" => \$baseadmin,
"basehome|bh=s" => \$basehome,
"baseerrors|be=s" => \$baseerrors,
"baseteams|bt=s" => \$baseteams,
"outdir|out|o=s" => \$outdir,
"deamon|d" => \$deamon,
"socket|s=s" => \$socket,
"help|h|?" => \$help);
$outdir = abs_path($outdir);
# Daemon mode: run forever until stdin is open
if ($deamon)
{
my $m :shared = Mirror->new();
@ -365,6 +402,8 @@ if ($deamon)
parse($m, "J");
$main_thread->join();
}
# Run given commands
elsif (@ARGV)
{
my $m :shared = Mirror->new();
@ -380,6 +419,8 @@ elsif (@ARGV)
parse($m, "J");
$main_thread->join();
}
# Just performs a full generation
else
{
my $m = genFull();
@ -390,11 +431,14 @@ else
sync(1);
}
# Clean tmp
remove_tree($main::tmpdir);
package Mirror;
# Structure to store a mirror state of a website
use v5.10.1;
use strict;
use warnings;
@ -403,6 +447,7 @@ use threads::shared;
use Thread::Queue;
# Initialize the class
sub new($)
{
my $class = shift;
@ -418,6 +463,7 @@ sub new($)
return $self;
}
# Launch threads and do more initialization
sub start($)
{
my $self = shift;
@ -434,6 +480,7 @@ sub start($)
}
}
# Enqueue a RESET
sub reset($)
{
my $self = shift;
@ -441,6 +488,7 @@ sub reset($)
$self->{add}->enqueue("#RESET");
}
# Clean queues and wait for stop threads
sub end($)
{
my $self = shift;
@ -470,6 +518,7 @@ sub stop($)
return $self->end();
}
# Enqueue URLs
sub add_url($@)
{
my $self = shift;
@ -480,6 +529,7 @@ sub add_url($@)
}
}
# Function executed to prepare URL to fetch
sub run_add
{
my $self = shift;
@ -520,6 +570,7 @@ sub run_add
}
}
# Main function to fetch pages
sub run
{
my $self = shift;
@ -563,6 +614,7 @@ sub run
}
}
# Join
sub join($)
{
my $self = shift;
@ -582,6 +634,8 @@ sub join($)
package FicPage;
# Represent a web page in order to perform some treatment on it
use v5.10.1;
use strict;
use warnings;
@ -591,6 +645,7 @@ use File::Path qw(make_path);
use HTTP::Request::Common qw(GET POST);
use LWP::UserAgent;
# Initialize the class
sub new
{
my $class = shift;
@ -603,6 +658,18 @@ sub new
return $self;
}
# Fetch the page content
sub fetch($)
{
my $self = shift;
my $ua = LWP::UserAgent->new;
my $res = $ua->request(GET $self->{url});
$self->{content} = $res->content;
}
# If the URL store is a short one, try to expand it by looking to correct URL
sub toRightURL($)
{
my $self = shift;
@ -625,6 +692,7 @@ sub toRightURL($)
return $self->{url};
}
# Retrieve all links in the page content
sub getLinks($)
{
my $self = shift;
@ -632,6 +700,7 @@ sub getLinks($)
return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g;
}
# Search relative (and absolute linking to the site part) links
sub getNearLinks($)
{
my $self = shift;
@ -655,6 +724,7 @@ sub getNearLinks($)
}
}
# Remove SALT base from URL contained in the page
sub treatLinks($)
{
my $self = shift;
@ -662,24 +732,7 @@ sub treatLinks($)
$self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx;
}
sub fetch($)
{
my $self = shift;
my $ua = LWP::UserAgent->new;
my $res = $ua->request(GET $self->{url});
$self->{content} = $res->content;
}
sub alreadySaved($;$)
{
my $self = shift;
my $path = $self->getSavePath(@_);
return -f $path || ( -d $path && -f "$path/index.html" );
}
# Generate the path where saved the page content
sub getSavePath($;$)
{
my $self = shift;
@ -692,6 +745,16 @@ sub getSavePath($;$)
return "$basedir$path";
}
# Is the page already saved?
sub alreadySaved($;$)
{
my $self = shift;
my $path = $self->getSavePath(@_);
return -f $path || ( -d $path && -f "$path/index.html" );
}
# Really save the page content
sub save($;$)
{
my $self = shift;
@ -724,3 +787,134 @@ sub save($;$)
};
print $@ if ($@);
}
__END__
=head1 NAME
Dave Null - The netiquette's guardian angel
=head1 USAGE
./gen_site.pl [OPTIONS] [COMMANDS]
=head1 DESCRIPTION
TODO
=head1 OPTIONS
=over
=item B<-baseadmin=string>
Called SALT_ADMIN in PHP part. Default: C</admin/>
=item B<-baseerrors=string>
There is no equivalent in PHP part, it's harcoded. Default: C</errors/>
=item B<-basehome=string>
Called SALT_PUBLIC in PHP part. Default: C</>
=item B<-baseteams=string>
Called SALT_USER in PHP part. Default: C</connected/>
=item B<-deamon>
Run forever (until JOIN scheduler instruction occurs).
=item B<-help>
Displays the help.
=item B<-outdir=path>
Path where save generated pages
=item B<-socket=path>
Path to the socket to create.
=item B<-threads=int>
Number of parallel fetch to perform.
=back
=head1 SCHEDULER COMMANDS
=over
=item B<all>
Generate pages for all teams.
=item B<HOME>
Generate pages for the public part.
=item B<ERRORS>
Generate errors pages.
=item B<TEAM00>
Generate all pages for the team C<00>.
=item B<TEAM00,11>
Generate pages for the C<11> theme for the team C<00>.
=item B<reset>
=item B<DSYNC>
Made a full synchronization of the output directory (remove existing output dir
and replace it by the current temporary content and reset temporary content).
=item B<SYNC>
Made a incremental synchronization with the output directory (just copy all
files in the current temporary content to the output dir, don't remove
anything).
=item B<LS>
Perform a C<ls> in the temporary directory content.
=item B<JOIN>
Flush the scheduler queue and
=item B<help>
Display some help.
=back
=head1 DEPENDENCIES
=over
=item
perl >= 5.10.1, compilated with threads support
=item
HTTP::Request::Common
=back
=head1 AUTHOR
Pierre-Olivier Mercier <nemunaire@epita.fr>
=head1 LICENSE AND COPYRIGHT
Copyright (c) 2014 Pierre-Olivier Mercier