# frozen_string_literal: true # One-time job to bootstrap Parquet export system # Exports all existing DuckDB data to weekly Parquet archives # Run this once when setting up Parquet exports for the first time # # Usage: # BootstrapParquetExportJob.perform_now # # or via docker: # docker compose exec jobs bin/rails runner "BootstrapParquetExportJob.perform_now" class BootstrapParquetExportJob < ApplicationJob queue_as :default def perform service = AnalyticsDuckdbService.instance # Check if DuckDB has any data event_count = service.event_count Rails.logger.info "[Parquet Bootstrap] DuckDB event count: #{event_count}" if event_count == 0 Rails.logger.warn "[Parquet Bootstrap] No events in DuckDB. Run SyncEventsToDuckdbJob first." return end # Check if Parquet files already exist existing_weeks = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size if existing_weeks > 0 Rails.logger.info "[Parquet Bootstrap] Found #{existing_weeks} existing week archives" end Rails.logger.info "[Parquet Bootstrap] Starting export of all DuckDB data to Parquet..." start_time = Time.current # Run the bootstrap export service.export_all_to_parquet duration = Time.current - start_time week_count = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size Rails.logger.info "[Parquet Bootstrap] Complete!" Rails.logger.info "[Parquet Bootstrap] - Time taken: #{duration.round(2)} seconds" Rails.logger.info "[Parquet Bootstrap] - Week archives: #{week_count}" Rails.logger.info "[Parquet Bootstrap] - Storage: #{AnalyticsDuckdbService::PARQUET_BASE_PATH}" Rails.logger.info "[Parquet Bootstrap] System is ready - jobs will maintain exports automatically" rescue StandardError => e Rails.logger.error "[Parquet Bootstrap] Job failed: #{e.message}" Rails.logger.error e.backtrace.join("\n") raise # Re-raise to mark job as failed end end