From d6d3fe4d7f25fe6b2de5e5c3587816fb11141ee8 Mon Sep 17 00:00:00 2001 From: Pat Teruel Date: Tue, 30 Dec 2025 21:18:32 +0800 Subject: [PATCH] feat: Add Docker containerization with Gitea Actions --- .dockerignore | 32 +++++++++ .gitea/workflows/build.yml | 68 ++++++++++++++++++ .github/copilot-instructions.md | 119 ++++++++++++++++++++++++++++++++ Dockerfile | 34 +++++++++ nginx.conf | 99 ++++++++++++++++++++++++++ 5 files changed, 352 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitea/workflows/build.yml create mode 100644 .github/copilot-instructions.md create mode 100644 Dockerfile create mode 100644 nginx.conf diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4fa21de --- /dev/null +++ b/.dockerignore @@ -0,0 +1,32 @@ +# Git +.git/ +.gitignore +.github/ + +# HTTrack artifacts (not part of the actual site) +hts-cache/ +hts-log.txt +cookies.txt +index.html +backblue.gif +fade.gif + +# Documentation +README.md +*.md + +# Docker +Dockerfile +.dockerignore +docker-compose.yml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml new file mode 100644 index 0000000..1d17952 --- /dev/null +++ b/.gitea/workflows/build.yml @@ -0,0 +1,68 @@ +name: Build and Push Docker Image + +on: + push: + branches: + - main + workflow_dispatch: + +env: + IMAGE_NAME: karaokepedia + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Uncomment if using GitHub Container Registry + # - name: Log in to GitHub Container Registry + # uses: docker/login-action@v3 + # with: + # registry: ghcr.io + # username: ${{ github.actor }} + # password: ${{ secrets.GITHUB_TOKEN }} + + # Uncomment if using Gitea Container Registry + # - name: Log in to Gitea Container Registry + # uses: docker/login-action@v3 + # with: + # registry: gitea.yourdomain.com + # username: ${{ secrets.GITEA_USERNAME }} + # password: ${{ secrets.GITEA_PASSWORD }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }} + # Use this for GHCR: + # images: ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} + # Use this for Gitea: + # images: gitea.yourdomain.com/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=latest + type=sha,prefix={{branch}}- + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Image digest + run: echo ${{ steps.meta.outputs.digest }} diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..66f03b5 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,119 @@ +# Karaokepedia - Static Website Archive + +## Project Overview +This is a **static HTML archive** of Karaokepedia (karaoke.karaniwan.org), a semi-crowdsourced karaoke database focused on Japanese/anime songs available in Philippine karaoke machines. The site was deprecated and migrated to awitotaku.com, but this archive preserves the original content for reference. + +**Status**: HTTrack mirror from December 29, 2025. Dockerized with nginx:alpine, automated builds via Gitea Actions. + +## Architecture + +### Static Content +- **Pure Static HTML**: All files in `karaoke.karaniwan.org/` are pre-rendered HTML pages (~17.6 MB, 2,309 files) +- **No backend**: HTTrack mirror of Ruby on Rails app (visible in HTML comments), but this is static HTML only +- **Assets included**: 2 MB of CSS/JS/fonts/images stay in-repo (self-contained archive) + +### Container Stack +- **Base image**: nginx:alpine (~25 MB) +- **Web server**: Custom nginx.conf with font MIME types, gzip, caching +- **Security**: Runs as non-root user, includes healthcheck +- **Final size**: ~35-40 MB total +- **CI/CD**: Gitea Actions builds on push to main + +## Structure +``` +. +├── karaoke.karaniwan.org/ # Static HTML content (served by nginx) +│ ├── songs/ # Individual song pages +│ ├── songs*.html # Paginated listings (hex-named) +│ ├── artists/ # Artist profiles +│ ├── karaoke_machines/ # Machine-specific listings +│ ├── tags/ # Tag-based grouping +│ ├── assets/ # CSS/JS/images/fonts (~2 MB) +│ └── index.html # Main entry point +├── Dockerfile # nginx:alpine with custom config +├── nginx.conf # Custom nginx configuration +├── .dockerignore # Excludes HTTrack artifacts +├── .gitea/workflows/build.yml # CI/CD pipeline +├── hts-cache/ # HTTrack metadata (not in image) +├── hts-log.txt # HTTrack log (not in image) +└── index.html # HTTrack root nav (not in image) +``` + +## Data Model (Static) +Each song page includes: +- **Song title** and artist (linked) +- **Machine keys**: KY (Kumyoung), TJ (TJ Media), P (Platinum) with numeric codes +- **Tags**: Language (Japanese/English/Korean/OPM), genre (Pop/Rock/Metal), type (Anime OST/Drama OST) +- **Release dates** and alternative names +- Bootstrap 3 responsive layout + +**File naming**: Hex-based (e.g., `songs9285.html`, `songs02d1.html`) for pagination/organization. + +## Development & Deployment + +### Local Testing +```bash +# Quick test with Python +python3 -m http.server 8000 +# Visit http://localhost:8000/karaoke.karaniwan.org/ + +# Docker build and run +docker build -t karaokepedia:test . +docker run -p 8080:80 karaokepedia:test +# Visit http://localhost:8080/ + +# Check healthcheck +docker inspect --format='{{.State.Health.Status}}' +``` + +### Building for Production +```bash +# Build with tags +docker build -t karaokepedia:latest . +docker tag karaokepedia:latest your-registry/karaokepedia:latest + +# Push to registry +docker push your-registry/karaokepedia:latest +``` + +### CI/CD Pipeline (Gitea Actions) +- **Trigger**: Push to `main` branch or manual dispatch +- **Workflow**: `.gitea/workflows/build.yml` (GitHub Actions-compatible syntax) +- **Steps**: Checkout → Setup Buildx → Login to registry → Build & push → Output digest +- **Tags**: `:latest` and `:main-` +- **Registry**: Configure via secrets (DOCKER_USERNAME/DOCKER_PASSWORD for Docker Hub, or adapt for Gitea registry) + +### Registry Configuration +Edit `.gitea/workflows/build.yml` and uncomment the appropriate registry: +- **Docker Hub** (default): Uses `DOCKER_USERNAME` and `DOCKER_PASSWORD` secrets +- **GitHub Container Registry**: Uncomment GHCR section, uses `GITHUB_TOKEN` +- **Gitea Container Registry**: Uncomment Gitea section, configure domain and credentials + +## Modifying Content +Since this is static HTML: +1. **Edit HTML directly** - no templating system +2. **Manual updates** across paginated files (songs*.html) if needed +3. **No regeneration tool** - changes must be applied per-file + +## Navigation Conventions +- **Internal links** are relative (e.g., `../songs/`, `../../artists/`) +- **Asset references** use `assets/application-*.css` and `assets/application-*.js` (fingerprinted) +- **External services**: Disqus comments (disabled), Piwik analytics (historic) + +## Key Patterns +- **Machine abbreviations**: KY=Kumyoung (red label), TJ=TJ Media (orange label), P=Platinum (no color) +- **Alphabetical pagination**: `songs.html?initial=A`, `songs6c50.html?initial=A` +- **Deprecated notices**: All pages show "This site has been deprecated. Proceed to Awit Otaku" + +## HTTrack Artifacts +- `hts-cache/`: Mirror metadata (new.lst lists all mirrored URLs) +- `hts-log.txt`: Download log (2312 links, 2309 files, 2 404 errors) +- `cookies.txt`: Session cookies from mirroring +- Root `index.html`: HTTrack navigation page + +## What NOT to Do +- Don't look for package.json, Gemfile, or build configs - they don't exist +- Don't try to "npm install" or "bundle install" - this isn't a dev project +- Don't modify HTTrack files (hts-cache/, hts-log.txt) - they're archival metadata +- Don't expect dynamic search/filtering - all functionality is static HTML links +- Don't include HTTrack artifacts in Docker image - they're excluded via .dockerignore diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1c17ae7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +# Stage 1: Build nginx image with static content +FROM nginx:alpine + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Copy custom nginx configuration +COPY nginx.conf /etc/nginx/nginx.conf + +# Copy static site content to nginx html directory +COPY karaoke.karaniwan.org/ /usr/share/nginx/html/ + +# Create nginx user and set permissions +RUN chown -R nginx:nginx /usr/share/nginx/html && \ + chown -R nginx:nginx /var/cache/nginx && \ + chown -R nginx:nginx /var/log/nginx && \ + touch /var/run/nginx.pid && \ + chown -R nginx:nginx /var/run/nginx.pid + +# Switch to non-root user +USER nginx + +# Expose port 80 +EXPOSE 80 + +# Add healthcheck +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost/ || exit 1 + +# Use dumb-init to handle signals properly +ENTRYPOINT ["/usr/bin/dumb-init", "--"] + +# Start nginx +CMD ["nginx", "-g", "daemon off;"] diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000..3e7a1ce --- /dev/null +++ b/nginx.conf @@ -0,0 +1,99 @@ +user nginx; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + access_log /var/log/nginx/access.log main; + + # Performance + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/xml text/javascript + application/json application/javascript application/xml+rss + application/x-javascript application/xhtml+xml + application/font-woff application/font-woff2 + font/woff font/woff2 font/ttf font/eot + image/svg+xml; + + # Additional MIME types for fonts + types { + application/font-woff woff; + application/font-woff2 woff2; + font/woff woff; + font/woff2 woff2; + application/vnd.ms-fontobject eot; + font/ttf ttf; + font/otf otf; + } + + server { + listen 80; + server_name _; + root /usr/share/nginx/html; + index index.html; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + + # Main location + location / { + try_files $uri $uri/ =404; + } + + # Cache static assets + location ~* ^/assets/ { + expires 1y; + add_header Cache-Control "public, immutable"; + access_log off; + } + + # Cache images and fonts + location ~* \.(jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + access_log off; + } + + # Cache CSS and JavaScript + location ~* \.(css|js)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } + + # Deny access to hidden files + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + + # Custom 404 page if it exists + error_page 404 /404.html; + location = /404.html { + internal; + } + } +}