#!/usr/bin/env python3 """ S3 Static Page Generator for happyDomain Generates static HTML index pages for browsing an S3 bucket. """ import os import sys import logging import shutil from datetime import datetime from typing import List, Dict, Tuple, Optional from pathlib import Path import boto3 from botocore.exceptions import ClientError, NoCredentialsError from jinja2 import Environment, FileSystemLoader, TemplateNotFound from dateutil import parser as date_parser # Configure logging logging.basicConfig( level=os.getenv('LOG_LEVEL', 'INFO'), format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' ) logger = logging.getLogger('s3-generator') class S3Client: """Client for interacting with S3-compatible storage.""" def __init__(self, endpoint_url: str, region_name: str, aws_access_key_id: str, aws_secret_access_key: str): """Initialize S3 client with custom endpoint.""" self.endpoint_url = endpoint_url self.region_name = region_name try: self.client = boto3.client( 's3', endpoint_url=endpoint_url, region_name=region_name, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key ) logger.debug(f"S3 client initialized: {endpoint_url}") except Exception as e: logger.error(f"Failed to initialize S3 client: {e}") raise def validate_connection(self, bucket: str) -> bool: """Test S3 connectivity by attempting to access the bucket.""" try: self.client.head_bucket(Bucket=bucket) logger.info(f"Successfully connected to bucket: {bucket}") return True except ClientError as e: error_code = e.response.get('Error', {}).get('Code', 'Unknown') logger.error(f"Failed to access bucket {bucket}: {error_code}") return False except NoCredentialsError: logger.error("No AWS credentials found") return False except Exception as e: logger.error(f"Unexpected error validating connection: {e}") return False def list_all_objects(self, bucket: str) -> List[Dict]: """Fetch all objects from bucket with pagination support.""" objects = [] continuation_token = None try: while True: params = {'Bucket': bucket} if continuation_token: params['ContinuationToken'] = continuation_token logger.debug(f"Fetching objects (token: {continuation_token})") response = self.client.list_objects_v2(**params) if 'Contents' in response: objects.extend(response['Contents']) logger.debug(f"Fetched {len(response['Contents'])} objects") if not response.get('IsTruncated', False): break continuation_token = response.get('NextContinuationToken') logger.info(f"Total objects fetched: {len(objects)}") return objects except ClientError as e: logger.error(f"Error listing objects: {e}") raise except Exception as e: logger.error(f"Unexpected error listing objects: {e}") raise class DirectoryTree: """Build and manage directory tree structure from S3 objects.""" def __init__(self): self.tree = {} self.files = [] def build_tree(self, s3_objects: List[Dict]) -> None: """Parse S3 object keys into directory tree structure.""" logger.info("Building directory tree...") for obj in s3_objects: key = obj['Key'] parts = key.split('/') # Skip keys ending with / (directory markers) if key.endswith('/'): logger.debug(f"Skipping directory marker: {key}") continue # Skip generated index.html files if parts[-1] == 'index.html': logger.debug(f"Skipping generated index file: {key}") continue # Handle files at root if len(parts) == 1: self.files.append({ 'path': '/', 'name': parts[0], 'size': obj.get('Size', 0), 'last_modified': obj.get('LastModified', datetime.now()), }) continue # Build nested directory structure current = self.tree path_parts = [] for part in parts[:-1]: path_parts.append(part) if part not in current: current[part] = {} current = current[part] # Add file to its directory file_path = '/' + '/'.join(path_parts) self.files.append({ 'path': file_path, 'name': parts[-1], 'size': obj.get('Size', 0), 'last_modified': obj.get('LastModified', datetime.now()), }) logger.info(f"Tree built with {len(self.files)} files") def get_all_paths(self) -> List[str]: """Return list of all unique directory paths.""" paths = ['/'] def traverse(node: Dict, current_path: str): for dirname in node.keys(): new_path = f"{current_path}{dirname}/" if current_path == '/' else f"{current_path}/{dirname}/" paths.append(new_path) traverse(node[dirname], new_path) traverse(self.tree, '/') return paths def get_directory_listing(self, path: str) -> Tuple[List[Dict], List[Dict]]: """Get subdirectories and files for a given path.""" # Normalize path path = path.rstrip('/') if not path: path = '/' # Find subdirectories if path == '/': subdirs = list(self.tree.keys()) else: parts = path.strip('/').split('/') current = self.tree for part in parts: if part in current: current = current[part] else: current = {} break subdirs = list(current.keys()) # Find files in this directory dir_files = [f for f in self.files if f['path'] == path] # Calculate last modified for directories (most recent file) dir_metadata = [] for dirname in sorted(subdirs, key=str.lower): dir_path = f"{path}/{dirname}" if path != '/' else f"/{dirname}" files_in_dir = [f for f in self.files if f['path'].startswith(dir_path)] if files_in_dir: last_mod = max([f['last_modified'] for f in files_in_dir]) else: last_mod = datetime.now() dir_metadata.append({ 'name': dirname, 'last_modified': last_mod }) # Format file metadata file_metadata = [] for file in sorted(dir_files, key=lambda x: x['name'].lower()): file_url = f"{path}/{file['name']}" if path != '/' else f"/{file['name']}" file_metadata.append({ 'name': file['name'], 'size': file['size'], 'last_modified': file['last_modified'], 'url': file_url }) return dir_metadata, file_metadata class HTMLGenerator: """Generate HTML pages from directory listings.""" def __init__(self, template_path: str): """Initialize with Jinja2 template.""" try: template_dir = os.path.dirname(template_path) template_name = os.path.basename(template_path) env = Environment(loader=FileSystemLoader(template_dir)) self.template = env.get_template(template_name) logger.debug(f"Template loaded: {template_path}") except TemplateNotFound: logger.error(f"Template not found: {template_path}") raise except Exception as e: logger.error(f"Error loading template: {e}") raise @staticmethod def format_size(size_bytes: int) -> str: """Convert bytes to human-readable format.""" if size_bytes == 0: return "0" units = ['', 'K', 'M', 'G', 'T', 'P'] size = float(size_bytes) unit_index = 0 while size >= 1024 and unit_index < len(units) - 1: size /= 1024 unit_index += 1 if unit_index == 0: return str(int(size)) else: return f"{size:.1f}{units[unit_index]}" @staticmethod def format_date(dt: datetime) -> str: """Format datetime as 'Jan 6, 2026 16:34'.""" return dt.strftime('%b %-d, %Y %H:%M') def generate_page(self, current_path: str, directories: List[Dict], files: List[Dict]) -> str: """Render HTML page for a directory.""" # Normalize path for display display_path = current_path if current_path != '/' else '/' # Determine if parent link should be shown show_parent = current_path != '/' # Format directories formatted_dirs = [] for d in directories: formatted_dirs.append({ 'name': d['name'], 'last_modified': self.format_date(d['last_modified']) }) # Format files formatted_files = [] for f in files: formatted_files.append({ 'name': f['name'], 'url': f['url'], 'last_modified': self.format_date(f['last_modified']), 'size': self.format_size(f['size']) }) # Render template try: html = self.template.render( current_path=display_path, parent_link=show_parent, directories=formatted_dirs, files=formatted_files ) return html except Exception as e: logger.error(f"Error rendering template for {current_path}: {e}") raise def load_config() -> Dict[str, str]: """Load configuration from environment variables.""" config = { 'endpoint': os.getenv('S3_ENDPOINT_URL'), 'bucket': os.getenv('S3_BUCKET'), 'region': os.getenv('S3_REGION', 'us-east-1'), 'access_key': os.getenv('AWS_ACCESS_KEY_ID') or os.getenv('S3_ACCESS_KEY'), 'secret_key': os.getenv('AWS_SECRET_ACCESS_KEY') or os.getenv('S3_SECRET_KEY'), } # Validate required config required = ['endpoint', 'bucket', 'access_key', 'secret_key'] missing = [k for k in required if not config.get(k)] if missing: logger.error(f"Missing required environment variables: {', '.join(missing)}") logger.error("Required: S3_ENDPOINT_URL, S3_BUCKET, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY") sys.exit(1) return config def cleanup_output_dir(output_dir: str) -> None: """Clean output directory before generation.""" if os.path.exists(output_dir): logger.info(f"Cleaning output directory: {output_dir}") shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) logger.debug(f"Output directory created: {output_dir}") def main(): """Main entry point for the S3 static page generator.""" logger.info("=" * 60) logger.info("S3 Static Page Generator for happyDomain") logger.info("=" * 60) # 1. Load configuration config = load_config() logger.info(f"Configuration loaded:") logger.info(f" Endpoint: {config['endpoint']}") logger.info(f" Bucket: {config['bucket']}") logger.info(f" Region: {config['region']}") # 2. Initialize S3 client try: s3_client = S3Client( endpoint_url=config['endpoint'], region_name=config['region'], aws_access_key_id=config['access_key'], aws_secret_access_key=config['secret_key'] ) except Exception as e: logger.error(f"Failed to initialize S3 client: {e}") sys.exit(1) # 3. Validate connection if not s3_client.validate_connection(config['bucket']): logger.error("Failed to connect to S3 bucket") sys.exit(1) # 4. List all objects logger.info("Fetching object list from S3...") try: objects = s3_client.list_all_objects(config['bucket']) except Exception as e: logger.error(f"Failed to list objects: {e}") sys.exit(1) logger.info(f"Found {len(objects)} objects in bucket") # 5. Build directory tree tree = DirectoryTree() tree.build_tree(objects) all_paths = tree.get_all_paths() logger.info(f"Identified {len(all_paths)} unique directories") # 6. Initialize HTML generator template_path = 'templates/index_template.html' try: html_gen = HTMLGenerator(template_path) except Exception as e: logger.error(f"Failed to initialize HTML generator: {e}") sys.exit(1) # 7. Clean output directory cleanup_output_dir('output') # 8. Generate index.html for each directory logger.info("Generating HTML pages...") for i, path in enumerate(sorted(all_paths), 1): logger.info(f"[{i}/{len(all_paths)}] Generating {path}index.html") try: dirs, files = tree.get_directory_listing(path) html_content = html_gen.generate_page( current_path=path, directories=dirs, files=files ) # Write to output directory if path == '/': output_path = 'output/index.html' else: output_path = os.path.join('output', path.strip('/'), 'index.html') os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(html_content) logger.debug(f"Written: {output_path}") except Exception as e: logger.error(f"Error generating page for {path}: {e}") sys.exit(1) logger.info("=" * 60) logger.info("Generation complete!") logger.info(f"Output directory: {os.path.abspath('output')}") logger.info("=" * 60) if __name__ == '__main__': try: main() except KeyboardInterrupt: logger.info("Interrupted by user") sys.exit(1) except Exception as e: logger.error(f"Unexpected error: {e}", exc_info=True) sys.exit(1)