diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4fa21de --- /dev/null +++ b/.dockerignore @@ -0,0 +1,32 @@ +# Git +.git/ +.gitignore +.github/ + +# HTTrack artifacts (not part of the actual site) +hts-cache/ +hts-log.txt +cookies.txt +index.html +backblue.gif +fade.gif + +# Documentation +README.md +*.md + +# Docker +Dockerfile +.dockerignore +docker-compose.yml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml new file mode 100644 index 0000000..e5d5e64 --- /dev/null +++ b/.gitea/workflows/build.yml @@ -0,0 +1,38 @@ +name: Build and Push Docker Image + +on: + push: + branches: + - main + workflow_dispatch: + +env: + IMAGE_NAME: karaokepedia + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Build Docker image + run: | + echo "Building Docker image..." + docker build -t karaokepedia:latest . + docker tag karaokepedia:latest karaokepedia:${{ github.sha }} + echo "✅ Image built successfully" + + - name: Log in to Docker Hub + run: | + echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + + - name: Push Docker image + run: | + # Push with registry prefix if secrets are configured + if [ -n "${{ secrets.DOCKER_USERNAME }}" ]; then + docker tag karaokepedia:latest ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}:latest + docker tag karaokepedia:latest ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }} + docker push ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}:latest + docker push ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }} + echo "✅ Pushed to Docker Hub: ${{ secrets.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}" + else + echo "⚠️ Docker Hub credentials not configured, skipping push" + fi diff --git a/.gitea/workflows/pr-validation.yml b/.gitea/workflows/pr-validation.yml new file mode 100644 index 0000000..68e5f82 --- /dev/null +++ b/.gitea/workflows/pr-validation.yml @@ -0,0 +1,175 @@ +name: PR - Pull Request Validation + +on: + pull_request: + types: [opened, edited, synchronize, reopened] + +jobs: + validate-pr-title: + runs-on: ubuntu-latest + steps: + - name: PR - Validate PR title follows Conventional Commits + run: | + PR_TITLE="${{ github.event.pull_request.title }}" + echo "Validating PR title: $PR_TITLE" + + # Conventional Commits pattern: type(scope): description + # Types: feat, fix, docs, style, refactor, perf, test, chore, ci, build, revert + PATTERN="^(feat|fix|docs|style|refactor|perf|test|chore|ci|build|revert)(\(.+\))?: .+" + + if echo "$PR_TITLE" | grep -qE "$PATTERN"; then + echo "✅ PR title follows Conventional Commits format" + exit 0 + else + echo "❌ PR title does not follow Conventional Commits format" + echo "" + echo "Expected format: (optional scope): " + echo "" + echo "Valid types:" + echo " - feat: A new feature" + echo " - fix: A bug fix" + echo " - docs: Documentation only changes" + echo " - style: Code style changes (formatting, missing semi-colons, etc)" + echo " - refactor: Code change that neither fixes a bug nor adds a feature" + echo " - perf: Performance improvements" + echo " - test: Adding or updating tests" + echo " - chore: Changes to build process or auxiliary tools" + echo " - ci: CI/CD configuration changes" + echo " - build: Changes to build system or dependencies" + echo " - revert: Reverts a previous commit" + echo "" + echo "Examples:" + echo " - feat: add user authentication" + echo " - fix(api): handle null pointer exception" + echo " - docs: update installation instructions" + echo " - ci: add Docker build workflow" + exit 1 + fi + + validate-docker: + runs-on: ubuntu-latest + steps: + - name: PR - Validate Dockerfile syntax + run: | + if [ -f "Dockerfile" ]; then + echo "✅ Dockerfile exists" + # Basic syntax check by attempting to parse it + docker build --no-cache --target="" -f Dockerfile . 2>&1 | head -5 || true + echo "✅ Dockerfile syntax appears valid" + else + echo "❌ Dockerfile not found" + exit 1 + fi + + - name: PR - Check .dockerignore exists + run: | + if [ -f ".dockerignore" ]; then + echo "✅ .dockerignore exists" + else + echo "⚠️ Warning: .dockerignore not found (recommended)" + fi + + - name: PR - Validate nginx config + run: | + if [ -f "nginx.conf" ]; then + echo "✅ nginx.conf exists" + # Test nginx config syntax using alpine nginx image + docker run --rm -v "$(pwd)/nginx.conf:/etc/nginx/nginx.conf:ro" nginx:alpine nginx -t + echo "✅ nginx.conf syntax is valid" + else + echo "⚠️ Warning: nginx.conf not found" + fi + + build-test: + runs-on: ubuntu-latest + steps: + - name: PR - Build and test Docker image + run: | + docker build -t karaokepedia:test . + CONTAINER_ID=$(docker run -d -p 8080:80 karaokepedia:test) + echo "Container started: $CONTAINER_ID" + + # Wait for container to be healthy + echo "Waiting for container to start..." + sleep 5 + + # Test homepage + if curl -f -s -o /dev/null http://localhost:8080/; then + echo "✅ Homepage loads successfully" + else + echo "❌ Homepage failed to load" + docker logs $CONTAINER_ID + exit 1 + fi + + # Test songs page + if curl -f -s -o /dev/null http://localhost:8080/songs.html; then + echo "✅ Songs page loads successfully" + else + echo "❌ Songs page failed to load" + exit 1 + fi + + # Test assets + if curl -f -s -o /dev/null http://localhost:8080/assets/application-b504973bc673ef9f09352588c7d791495f9fa7b652e6bf0d71ea86a094aa4007.css; then + echo "✅ CSS assets load successfully" + else + echo "❌ CSS assets failed to load" + exit 1 + fi + + # Cleanup + docker stop $CONTAINER_ID + docker rm $CONTAINER_ID + + echo "✅ All container tests passed" + + check-files: + runs-on: ubuntu-latest + steps: + - name: PR - Check required files exist + run: | + REQUIRED_FILES=( + "karaoke.karaniwan.org/index.html" + "karaoke.karaniwan.org/songs.html" + "karaoke.karaniwan.org/assets/application-b504973bc673ef9f09352588c7d791495f9fa7b652e6bf0d71ea86a094aa4007.css" + "Dockerfile" + "nginx.conf" + ".dockerignore" + ) + + MISSING_FILES=() + for file in "${REQUIRED_FILES[@]}"; do + if [ -f "$file" ]; then + echo "✅ $file exists" + else + echo "❌ $file is missing" + MISSING_FILES+=("$file") + fi + done + + if [ ${#MISSING_FILES[@]} -gt 0 ]; then + echo "" + echo "❌ Missing required files:" + printf ' - %s\n' "${MISSING_FILES[@]}" + exit 1 + fi + + echo "" + echo "✅ All required files present" + + - name: PR - Check for HTTrack artifacts in content + run: | + echo "Checking that HTTrack artifacts are not included in Docker context..." + + # These should be in .dockerignore + if [ -d "hts-cache" ]; then + if grep -q "hts-cache" .dockerignore; then + echo "✅ hts-cache/ directory excluded in .dockerignore" + else + echo "❌ hts-cache/ should be in .dockerignore" + exit 1 + fi + fi + + echo "✅ HTTrack artifacts properly excluded" diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..9e07824 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,137 @@ +# Karaokepedia - Static Website Archive + +## Project Overview +This is a **static HTML archive** of Karaokepedia (karaoke.karaniwan.org), a semi-crowdsourced karaoke database focused on Japanese/anime songs available in Philippine karaoke machines. The site was deprecated and migrated to awitotaku.com, but this archive preserves the original content for reference. + +**Status**: HTTrack mirror from December 29, 2025. Dockerized with nginx:alpine, automated builds via Gitea Actions. + +## Architecture + +### Static Content +- **Pure Static HTML**: All files in `karaoke.karaniwan.org/` are pre-rendered HTML pages (~17.6 MB, 2,309 files) +- **No backend**: HTTrack mirror of Ruby on Rails app (visible in HTML comments), but this is static HTML only +- **Assets included**: 2 MB of CSS/JS/fonts/images stay in-repo (self-contained archive) + +### Container Stack +- **Base image**: nginx:alpine (~25 MB) +- **Web server**: Custom nginx.conf with font MIME types, gzip, caching +- **Security**: Runs as non-root user, includes healthcheck +- **Final size**: ~35-40 MB total +- **CI/CD**: Gitea Actions builds on push to main + +## Structure +``` +. +├── karaoke.karaniwan.org/ # Static HTML content (served by nginx) +│ ├── songs/ # Individual song pages +│ ├── songs*.html # Paginated listings (hex-named) +│ ├── artists/ # Artist profiles +│ ├── karaoke_machines/ # Machine-specific listings +│ ├── tags/ # Tag-based grouping +│ ├── assets/ # CSS/JS/images/fonts (~2 MB) +│ └── index.html # Main entry point +├── Dockerfile # nginx:alpine with custom config +├── nginx.conf # Custom nginx configuration +├── .dockerignore # Excludes HTTrack artifacts +├── .gitea/workflows/build.yml # CI/CD pipeline +├── hts-cache/ # HTTrack metadata (not in image) +├── hts-log.txt # HTTrack log (not in image) +└── index.html # HTTrack root nav (not in image) +``` + +## Data Model (Static) +Each song page includes: +- **Song title** and artist (linked) +- **Machine keys**: KY (Kumyoung), TJ (TJ Media), P (Platinum) with numeric codes +- **Tags**: Language (Japanese/English/Korean/OPM), genre (Pop/Rock/Metal), type (Anime OST/Drama OST) +- **Release dates** and alternative names +- Bootstrap 3 responsive layout + +**File naming**: Hex-based (e.g., `songs9285.html`, `songs02d1.html`) for pagination/organization. + +## Development & Deployment + +### Local Testing +```bash +# Quick test with Python +python3 -m http.server 8000 +# Visit http://localhost:8000/karaoke.karaniwan.org/ + +# Docker build and run +docker build -t karaokepedia:test . +docker run -p 8080:80 karaokepedia:test +# Visit http://localhost:8080/ + +# Check healthcheck +docker inspect --format='{{.State.Health.Status}}' +``` + +### Building for Production +```bash +# Build with tags +docker build -t karaokepedia:latest . +docker tag karaokepedia:latest your-registry/karaokepedia:latest + +# Push to registry +docker push your-registry/karaokepedia:latest +``` + +### CI/CD Pipelines (Gitea Actions) + +#### Build & Deploy (`.gitea/workflows/build.yml`) +- **Trigger**: Push to `main` branch or manual dispatch +- **Steps**: Checkout → Setup Buildx → Login to registry → Build & push → Output digest +- **Tags**: `:latest` and `:main-` +- **Registry**: Configure via secrets (DOCKER_USERNAME/DOCKER_PASSWORD for Docker Hub, or adapt for Gitea registry) + +#### PR Validation (`.gitea/workflows/pr-validation.yml`) +- **Trigger**: Pull request opened, edited, synchronized, or reopened +- **Jobs**: + - `validate-pr-title`: Enforces [Conventional Commits](https://www.conventionalcommits.org/) format + - `validate-docker`: Checks Dockerfile, nginx.conf, and .dockerignore syntax + - `build-test`: Builds image and tests container starts, pages load, assets accessible + - `check-files`: Verifies required files exist, HTTrack artifacts excluded + +**Conventional Commit Types**: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `chore`, `ci`, `build`, `revert` + +**Example PR titles**: +- ✅ `feat: add user authentication` +- ✅ `fix(docker): correct nginx config path` +- ✅ `docs: update README with deployment steps` +- ❌ `Added new feature` (missing type) +- ❌ `Update files` (not descriptive) + +### Registry Configuration +Edit `.gitea/workflows/build.yml` and uncomment the appropriate registry: +- **Docker Hub** (default): Uses `DOCKER_USERNAME` and `DOCKER_PASSWORD` secrets +- **GitHub Container Registry**: Uncomment GHCR section, uses `GITHUB_TOKEN` +- **Gitea Container Registry**: Uncomment Gitea section, configure domain and credentials + +## Modifying Content +Since this is static HTML: +1. **Edit HTML directly** - no templating system +2. **Manual updates** across paginated files (songs*.html) if needed +3. **No regeneration tool** - changes must be applied per-file + +## Navigation Conventions +- **Internal links** are relative (e.g., `../songs/`, `../../artists/`) +- **Asset references** use `assets/application-*.css` and `assets/application-*.js` (fingerprinted) +- **External services**: Disqus comments (disabled), Piwik analytics (historic) + +## Key Patterns +- **Machine abbreviations**: KY=Kumyoung (red label), TJ=TJ Media (orange label), P=Platinum (no color) +- **Alphabetical pagination**: `songs.html?initial=A`, `songs6c50.html?initial=A` +- **Deprecated notices**: All pages show "This site has been deprecated. Proceed to Awit Otaku" + +## HTTrack Artifacts +- `hts-cache/`: Mirror metadata (new.lst lists all mirrored URLs) +- `hts-log.txt`: Download log (2312 links, 2309 files, 2 404 errors) +- `cookies.txt`: Session cookies from mirroring +- Root `index.html`: HTTrack navigation page + +## What NOT to Do +- Don't look for package.json, Gemfile, or build configs - they don't exist +- Don't try to "npm install" or "bundle install" - this isn't a dev project +- Don't modify HTTrack files (hts-cache/, hts-log.txt) - they're archival metadata +- Don't expect dynamic search/filtering - all functionality is static HTML links +- Don't include HTTrack artifacts in Docker image - they're excluded via .dockerignore diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1c17ae7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +# Stage 1: Build nginx image with static content +FROM nginx:alpine + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Copy custom nginx configuration +COPY nginx.conf /etc/nginx/nginx.conf + +# Copy static site content to nginx html directory +COPY karaoke.karaniwan.org/ /usr/share/nginx/html/ + +# Create nginx user and set permissions +RUN chown -R nginx:nginx /usr/share/nginx/html && \ + chown -R nginx:nginx /var/cache/nginx && \ + chown -R nginx:nginx /var/log/nginx && \ + touch /var/run/nginx.pid && \ + chown -R nginx:nginx /var/run/nginx.pid + +# Switch to non-root user +USER nginx + +# Expose port 80 +EXPOSE 80 + +# Add healthcheck +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost/ || exit 1 + +# Use dumb-init to handle signals properly +ENTRYPOINT ["/usr/bin/dumb-init", "--"] + +# Start nginx +CMD ["nginx", "-g", "daemon off;"] diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000..3e7a1ce --- /dev/null +++ b/nginx.conf @@ -0,0 +1,99 @@ +user nginx; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + access_log /var/log/nginx/access.log main; + + # Performance + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/xml text/javascript + application/json application/javascript application/xml+rss + application/x-javascript application/xhtml+xml + application/font-woff application/font-woff2 + font/woff font/woff2 font/ttf font/eot + image/svg+xml; + + # Additional MIME types for fonts + types { + application/font-woff woff; + application/font-woff2 woff2; + font/woff woff; + font/woff2 woff2; + application/vnd.ms-fontobject eot; + font/ttf ttf; + font/otf otf; + } + + server { + listen 80; + server_name _; + root /usr/share/nginx/html; + index index.html; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + + # Main location + location / { + try_files $uri $uri/ =404; + } + + # Cache static assets + location ~* ^/assets/ { + expires 1y; + add_header Cache-Control "public, immutable"; + access_log off; + } + + # Cache images and fonts + location ~* \.(jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + access_log off; + } + + # Cache CSS and JavaScript + location ~* \.(css|js)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } + + # Deny access to hidden files + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + + # Custom 404 page if it exists + error_page 404 /404.html; + location = /404.html { + internal; + } + } +}