From 4fe56db0d62ef2aadd6b8fc195497054fc5c3e0d Mon Sep 17 00:00:00 2001 From: nemunaire Date: Thu, 20 Nov 2014 15:39:54 +0100 Subject: [PATCH] Document gen_site.pl --- gen_site.pl | 258 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 226 insertions(+), 32 deletions(-) diff --git a/gen_site.pl b/gen_site.pl index 5f980509..17928f21 100755 --- a/gen_site.pl +++ b/gen_site.pl @@ -1,4 +1,14 @@ #!/usr/bin/env perl +#============================================================================= +# +# USAGE: ./gen_site.pl [options] [commands] +# +# DESCRIPTION: More efficient wget -m +# +# AUTHOR: Pierre-Olivier Mercier +# ORGANIZATION: EPITA SRS +# +#============================================================================= use v5.10.1; use strict; @@ -16,6 +26,8 @@ use Getopt::Long; use IO::Socket; use Thread::Queue; +### GLOBALS ########################################################### + our $outdir = "outest"; our $outteams = "/teams/"; our $outerrors = "/errors/"; @@ -29,6 +41,16 @@ our $baseerrors = "/errors/"; our $baseteams = "/connected/"; our $threads = 6; +my $deamon; +my $socket; + +my $queue :shared = Thread::Queue->new(); +my $main_thread; + + +### GENERATORS ######################################################## + +# Enqueue error pages in the mirror sub genErrors(;$) { my $m = shift // Mirror->new(); @@ -43,6 +65,7 @@ sub genErrors(;$) return $m; } +# Enqueue public pages in the mirror sub genHome(;$) { my $m = shift // Mirror->new(); @@ -54,6 +77,7 @@ sub genHome(;$) return $m; } +# Enqueue all teams in the mirror sub genFull(;$) { my $m = shift // Mirror->new(); @@ -62,6 +86,7 @@ sub genFull(;$) return $m; } +# Enqueue a team in the mirror sub genTeam($;$) { my $team_id = shift; @@ -72,6 +97,7 @@ sub genTeam($;$) return $m; } +# Enqueue theme pages for a given team in the mirror sub genTeamTheme($$;$) { my $team_id = shift; @@ -84,9 +110,9 @@ sub genTeamTheme($$;$) } -my $queue :shared = Thread::Queue->new(); -my $main_thread; +### TOOLS ############################################################# +# Manage the mirror sub manage { my $m = shift // Mirror->new(); @@ -187,6 +213,7 @@ sub manage } } +# Perform a synchronization of the temporary mirrored directory sub sync { if (shift) @@ -261,6 +288,7 @@ sub sync } } +# Parse input command and enqueue them sub parse($$;$) { my $m = shift; @@ -298,19 +326,9 @@ sub parse($$;$) } -# Parse arguments -my $help; my $deamon; my $socket; -GetOptions ("threads|thread|t=i" => \$threads, - "baseadmin|ba=s" => \$baseadmin, - "basehome|bh=s" => \$basehome, - "baseteams|bt=s" => \$baseteams, - "outdir|out|o=s" => \$outdir, - "deamon|d" => \$deamon, - "socket|s=s" => \$socket, - "help|h|?" => \$help); - -$outdir = abs_path($outdir); +### SOCKETS ########################################################### +# Create the socket and wait for connection sub create_socket { my $m = shift; @@ -332,6 +350,7 @@ sub create_socket } } +# Manage the socket connection sub socket_run { my $m = shift; @@ -348,6 +367,24 @@ sub socket_run close $connection; } + +### MAIN ################################################################ + +# Parse arguments +my $help; +GetOptions ("threads|thread|t=i" => \$threads, + "baseadmin|ba=s" => \$baseadmin, + "basehome|bh=s" => \$basehome, + "baseerrors|be=s" => \$baseerrors, + "baseteams|bt=s" => \$baseteams, + "outdir|out|o=s" => \$outdir, + "deamon|d" => \$deamon, + "socket|s=s" => \$socket, + "help|h|?" => \$help); + +$outdir = abs_path($outdir); + +# Daemon mode: run forever until stdin is open if ($deamon) { my $m :shared = Mirror->new(); @@ -365,6 +402,8 @@ if ($deamon) parse($m, "J"); $main_thread->join(); } + +# Run given commands elsif (@ARGV) { my $m :shared = Mirror->new(); @@ -380,6 +419,8 @@ elsif (@ARGV) parse($m, "J"); $main_thread->join(); } + +# Just performs a full generation else { my $m = genFull(); @@ -390,11 +431,14 @@ else sync(1); } +# Clean tmp remove_tree($main::tmpdir); package Mirror; +# Structure to store a mirror state of a website + use v5.10.1; use strict; use warnings; @@ -403,6 +447,7 @@ use threads::shared; use Thread::Queue; +# Initialize the class sub new($) { my $class = shift; @@ -418,6 +463,7 @@ sub new($) return $self; } +# Launch threads and do more initialization sub start($) { my $self = shift; @@ -434,6 +480,7 @@ sub start($) } } +# Enqueue a RESET sub reset($) { my $self = shift; @@ -441,6 +488,7 @@ sub reset($) $self->{add}->enqueue("#RESET"); } +# Clean queues and wait for stop threads sub end($) { my $self = shift; @@ -470,6 +518,7 @@ sub stop($) return $self->end(); } +# Enqueue URLs sub add_url($@) { my $self = shift; @@ -480,6 +529,7 @@ sub add_url($@) } } +# Function executed to prepare URL to fetch sub run_add { my $self = shift; @@ -520,6 +570,7 @@ sub run_add } } +# Main function to fetch pages sub run { my $self = shift; @@ -563,6 +614,7 @@ sub run } } +# Join sub join($) { my $self = shift; @@ -582,6 +634,8 @@ sub join($) package FicPage; +# Represent a web page in order to perform some treatment on it + use v5.10.1; use strict; use warnings; @@ -591,6 +645,7 @@ use File::Path qw(make_path); use HTTP::Request::Common qw(GET POST); use LWP::UserAgent; +# Initialize the class sub new { my $class = shift; @@ -603,6 +658,18 @@ sub new return $self; } +# Fetch the page content +sub fetch($) +{ + my $self = shift; + my $ua = LWP::UserAgent->new; + + my $res = $ua->request(GET $self->{url}); + + $self->{content} = $res->content; +} + +# If the URL store is a short one, try to expand it by looking to correct URL sub toRightURL($) { my $self = shift; @@ -625,6 +692,7 @@ sub toRightURL($) return $self->{url}; } +# Retrieve all links in the page content sub getLinks($) { my $self = shift; @@ -632,6 +700,7 @@ sub getLinks($) return $self->{content} =~ /(?:src|href|action)="([^"]+)"/g; } +# Search relative (and absolute linking to the site part) links sub getNearLinks($) { my $self = shift; @@ -655,6 +724,7 @@ sub getNearLinks($) } } +# Remove SALT base from URL contained in the page sub treatLinks($) { my $self = shift; @@ -662,24 +732,7 @@ sub treatLinks($) $self->{content} =~ s!(src|href|action)="( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx; } -sub fetch($) -{ - my $self = shift; - my $ua = LWP::UserAgent->new; - - my $res = $ua->request(GET $self->{url}); - - $self->{content} = $res->content; -} - -sub alreadySaved($;$) -{ - my $self = shift; - - my $path = $self->getSavePath(@_); - return -f $path || ( -d $path && -f "$path/index.html" ); -} - +# Generate the path where saved the page content sub getSavePath($;$) { my $self = shift; @@ -692,6 +745,16 @@ sub getSavePath($;$) return "$basedir$path"; } +# Is the page already saved? +sub alreadySaved($;$) +{ + my $self = shift; + + my $path = $self->getSavePath(@_); + return -f $path || ( -d $path && -f "$path/index.html" ); +} + +# Really save the page content sub save($;$) { my $self = shift; @@ -724,3 +787,134 @@ sub save($;$) }; print $@ if ($@); } + +__END__ + +=head1 NAME + +Dave Null - The netiquette's guardian angel + +=head1 USAGE + + ./gen_site.pl [OPTIONS] [COMMANDS] + +=head1 DESCRIPTION + +TODO + +=head1 OPTIONS + +=over + +=item B<-baseadmin=string> + +Called SALT_ADMIN in PHP part. Default: C + +=item B<-baseerrors=string> + +There is no equivalent in PHP part, it's harcoded. Default: C + +=item B<-basehome=string> + +Called SALT_PUBLIC in PHP part. Default: C + +=item B<-baseteams=string> + +Called SALT_USER in PHP part. Default: C + +=item B<-deamon> + +Run forever (until JOIN scheduler instruction occurs). + +=item B<-help> + +Displays the help. + +=item B<-outdir=path> + +Path where save generated pages + +=item B<-socket=path> + +Path to the socket to create. + +=item B<-threads=int> + +Number of parallel fetch to perform. + +=back + +=head1 SCHEDULER COMMANDS + +=over + +=item B + +Generate pages for all teams. + +=item B + +Generate pages for the public part. + +=item B + +Generate errors pages. + +=item B + +Generate all pages for the team C<00>. + +=item B + +Generate pages for the C<11> theme for the team C<00>. + +=item B + + + +=item B + +Made a full synchronization of the output directory (remove existing output dir +and replace it by the current temporary content and reset temporary content). + +=item B + +Made a incremental synchronization with the output directory (just copy all +files in the current temporary content to the output dir, don't remove +anything). + +=item B + +Perform a C in the temporary directory content. + +=item B + +Flush the scheduler queue and + +=item B + +Display some help. + +=back + +=head1 DEPENDENCIES + +=over + +=item + +perl >= 5.10.1, compilated with threads support + +=item + +HTTP::Request::Common + +=back + +=head1 AUTHOR + +Pierre-Olivier Mercier + +=head1 LICENSE AND COPYRIGHT + +Copyright (c) 2014 Pierre-Olivier Mercier