#!/usr/bin/env perl use v5.10.1; use strict; use warnings; use threads; use threads::shared; use Getopt::Long; our $baseurl = "http://localhost/"; our $baseadmin = "admin/"; our $basehome = ""; our $baseteams = "connected/"; our $threads = 5; sub test { my $p = FicPage->new("http://localhost/connected/169/6"); say $p->getSavePath(); $p->fetch(); $p->toRightURL(); $p->treatLinks(); print $p->getNearLinks(); $p->save(); } sub genHome() { mirror($baseurl . $basehome); } sub genTeam($) { my $team_id = shift; my $p = FicPage->new($baseurl . $baseteams . $team_id); $p->fetch(); my @ths; for my $link ($p->getNearLinks()) { push @ths, threads->create(\&mirror, $link) if ($link =~ /\Q$baseteams\E/); } for my $th (@ths) { $th->join(); } } sub genFull() { my $p = FicPage->new($baseurl . $baseteams); $p->fetch(); my @ths; for my $link ($p->getNearLinks()) { push @ths, threads->create(\&mirror, $link) if ($link =~ /\Q$baseteams\E/); } for my $th (@ths) { $th->join(); } } sub parse($) { say shift; } sub mirror { my $p = FicPage->new($baseurl . $_[0]); return if ($p->alreadySaved()); $p->fetch(); my @links = $p->getNearLinks(); $p->treatLinks(); $p->save(); for my $link (@links) { mirror($link) if ($_[0] ne $link); } } # Parse arguments my $help; my $deamon; GetOptions ("threads|thread|t=i" => \$threads, "baseadmin|ba=s" => \$baseadmin, "basehome|bh=s" => \$basehome, "baseteams|bt=s" => \$baseteams, "deamon|d|?" => \$deamon, "help|h|?" => \$help); if ($deamon) { while(<>) { parse($_); } } elsif (@ARGV) { while (shift) { parse($_); } } else { my $th = threads->create(\&genHome); genFull(); $th->join(); } package FicPage; use v5.10.1; use strict; use warnings; use File::Basename; use File::Path qw(make_path); use HTTP::Request::Common qw(GET POST); use LWP::UserAgent; sub new { my $class = shift; my $self = { url => shift, }; bless $self, $class; return $self; } sub toRightURL($) { my $self = shift; my $search = $self->{url}; $search =~ s!/$!!; for my $url ($self->getNearLinks()) { $url = $baseurl . $url; if ($url =~ /^\Q$search\E/) { $url =~ s!^(\Q$search\E[^/]*).*$!$1/!; $url .= "/" if ($url !~ /\.[a-z0-9]{2,4}$/); $self->{url} = $url; return $url; } } return $self->{url}; } sub getLinks($) { my $self = shift; return $self->{content} =~ /(?:src|href)="([^"]+)"/g; } sub getNearLinks($) { my $self = shift; return $self->{content} =~ /(?:src|href)="(?:\Q$baseurl\E|(?:\/?\.\.)+|\/)([^"]+)"/g; } sub treatLinks($) { my $self = shift; $self->{content} =~ s!(src|href)="/( \Q$baseteams\E[^/]+/ | \Q$baseadmin\E | \Q$basehome\E)([^"]*)"!$1="/$3"!gx; } sub fetch($) { my $self = shift; my $ua = LWP::UserAgent->new; my $res = $ua->request(GET $self->{url}); $self->{content} = $res->content; } sub alreadySaved($;$) { my $self = shift; my $path = $self->getSavePath(@_); return -f $path || ( -d $path && -f "$path/index.html" ); } sub getSavePath($;$) { my $self = shift; my $basedir = shift // "outest"; # Convert URL to real directory path my $path = $self->{url}; $path =~ s/^\Q$main::baseurl\E//; return "$basedir/$path"; } sub save($;$) { my $self = shift; # Convert URL to real directory path my $path = $self->getSavePath(@_); eval { if ($path =~ /\.[a-z0-9]{2,4}$/) { eval { make_path( dirname("$path") ) if (! -d dirname("$path") ); }; open my $fd, ">", $path or die "$path: $!"; print $fd $self->{content}; close $fd; } else { eval { make_path "$path" if (! -d "$path"); }; open my $fd, ">", "$path/index.html" or die "$path: $!"; print $fd $self->{content}; close $fd; } }; print $@ if ($@); }