# frozen_string_literal: true # AnalyticsController - Overview dashboard with statistics and charts class AnalyticsController < ApplicationController # All actions require authentication def index authorize :analytics, :index? # Track overall request time request_start = Time.current # Time period selector (default: last 24 hours) @time_period = params[:period]&.to_sym || :day @start_time = calculate_start_time(@time_period) # Cache TTL based on time period cache_ttl = case @time_period when :hour then 5.minutes when :day then 1.hour when :week then 6.hours when :month then 12.hours else 1.hour end # Cache key includes period and start_time (hour-aligned for consistency) cache_key_base = "analytics/#{@time_period}/#{@start_time.to_i}" # Core statistics - cached (uses DuckDB if available) stat_start = Time.current @total_events = Rails.cache.fetch("#{cache_key_base}/total_events", expires_in: cache_ttl) do with_duckdb_fallback { EventDdb.count_since(@start_time) } || Event.where("timestamp >= ?", @start_time).count end Rails.logger.info "[Analytics Perf] Total events: #{((Time.current - stat_start) * 1000).round(1)}ms" @total_rules = Rails.cache.fetch("analytics/total_rules", expires_in: 5.minutes) do Rule.enabled.count end @network_ranges_with_events = Rails.cache.fetch("analytics/network_ranges_with_events", expires_in: 5.minutes) do NetworkRange.with_events.count end @total_network_ranges = Rails.cache.fetch("analytics/total_network_ranges", expires_in: 5.minutes) do NetworkRange.count end # Event breakdown by action - cached (uses DuckDB if available) stat_start = Time.current @event_breakdown = Rails.cache.fetch("#{cache_key_base}/event_breakdown", expires_in: cache_ttl) do with_duckdb_fallback { EventDdb.breakdown_by_action(@start_time) } || Event.where("timestamp >= ?", @start_time) .group(:waf_action) .count end Rails.logger.info "[Analytics Perf] Event breakdown: #{((Time.current - stat_start) * 1000).round(1)}ms" # Top countries by event count - cached (uses DuckDB if available) stat_start = Time.current @top_countries = Rails.cache.fetch("#{cache_key_base}/top_countries", expires_in: cache_ttl) do with_duckdb_fallback { EventDdb.top_countries(@start_time, 10) } || Event.where("timestamp >= ? AND country IS NOT NULL", @start_time) .group(:country) .count .sort_by { |_, count| -count } .first(10) end Rails.logger.info "[Analytics Perf] Top countries: #{((Time.current - stat_start) * 1000).round(1)}ms" # Top blocked IPs - cached (uses DuckDB if available) stat_start = Time.current @top_blocked_ips = Rails.cache.fetch("#{cache_key_base}/top_blocked_ips", expires_in: cache_ttl) do with_duckdb_fallback { EventDdb.top_blocked_ips(@start_time, 10) } || Event.where("timestamp >= ?", @start_time) .where(waf_action: 0) # deny action in enum .group(:ip_address) .count .sort_by { |_, count| -count } .first(10) end Rails.logger.info "[Analytics Perf] Top blocked IPs: #{((Time.current - stat_start) * 1000).round(1)}ms" # Network range intelligence breakdown - cached @network_intelligence = Rails.cache.fetch("analytics/network_intelligence", expires_in: 10.minutes) do { datacenter_ranges: NetworkRange.datacenter.count, vpn_ranges: NetworkRange.vpn.count, proxy_ranges: NetworkRange.proxy.count, total_ranges: NetworkRange.count } end # Recent activity - minimal cache for freshness @recent_events = Rails.cache.fetch("analytics/recent_events", expires_in: 1.minute) do Event.recent.limit(10).to_a end @recent_rules = Rails.cache.fetch("analytics/recent_rules", expires_in: 5.minutes) do Rule.order(created_at: :desc).limit(5).to_a end # System health indicators - cached @system_health = Rails.cache.fetch("#{cache_key_base}/system_health", expires_in: cache_ttl) do { total_users: User.count, active_rules: Rule.enabled.count, disabled_rules: Rule.where(enabled: false).count, recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 0).count # 0 = deny } end # Job queue statistics - short cache for near real-time @job_statistics = Rails.cache.fetch("analytics/job_statistics", expires_in: 30.seconds) do calculate_job_statistics end # Prepare data for charts - split caching for current vs historical data stat_start = Time.current @chart_data = prepare_chart_data_with_split_cache(cache_key_base, cache_ttl) Rails.logger.info "[Analytics Perf] Chart data: #{((Time.current - stat_start) * 1000).round(1)}ms" Rails.logger.info "[Analytics Perf] TOTAL REQUEST: #{((Time.current - request_start) * 1000).round(1)}ms" respond_to do |format| format.html format.turbo_stream end end def networks authorize :analytics, :index? # Time period selector (default: last 24 hours) @time_period = params[:period]&.to_sym || :day @start_time = calculate_start_time(@time_period) # Top networks by request volume - use DuckDB if available network_stats = with_duckdb_fallback { EventDdb.top_networks(@start_time, 50) } if network_stats # DuckDB path: array format [network_range_id, event_count, unique_ips] network_ids = network_stats.map { |row| row[0] } stats_by_id = network_stats.to_h { |row| [row[0], { event_count: row[1], unique_ips: row[2] }] } @top_networks = NetworkRange.where(id: network_ids) .to_a .map do |network| stats = stats_by_id[network.id] network.define_singleton_method(:event_count) { stats[:event_count] } network.define_singleton_method(:unique_ips) { stats[:unique_ips] } # Add inherited intelligence support intelligence = network.inherited_intelligence if intelligence[:inherited] network.define_singleton_method(:display_company) { intelligence[:company] } network.define_singleton_method(:display_country) { intelligence[:country] } network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] } network.define_singleton_method(:has_inherited_data?) { true } else network.define_singleton_method(:display_company) { network.company } network.define_singleton_method(:display_country) { network.country } network.define_singleton_method(:inherited_from) { nil } network.define_singleton_method(:has_inherited_data?) { false } end network end .sort_by { |n| -n.event_count } else # PostgreSQL fallback event_stats = Event.where("timestamp >= ?", @start_time) .where.not(network_range_id: nil) .group(:network_range_id) .select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips") @top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id") .select("network_ranges.*, stats.event_count, stats.unique_ips") .order("stats.event_count DESC") .limit(50) # Add inherited intelligence support for PostgreSQL fallback @top_networks = @top_networks.to_a.map do |network| intelligence = network.inherited_intelligence if intelligence[:inherited] network.define_singleton_method(:display_company) { intelligence[:company] } network.define_singleton_method(:display_country) { intelligence[:country] } network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] } network.define_singleton_method(:has_inherited_data?) { true } else network.define_singleton_method(:display_company) { network.company } network.define_singleton_method(:display_country) { network.country } network.define_singleton_method(:inherited_from) { nil } network.define_singleton_method(:has_inherited_data?) { false } end network end end # Network type breakdown with traffic stats @network_breakdown = calculate_network_type_stats(@start_time) # Company breakdown for top traffic sources - use DuckDB if available @top_companies = with_duckdb_fallback { EventDdb.top_companies(@start_time, 20) } || Event.where("timestamp >= ? AND company IS NOT NULL", @start_time) .group(:company) .select("company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count") .order("event_count DESC") .limit(20) # ASN breakdown - use DuckDB if available @top_asns = with_duckdb_fallback { EventDdb.top_asns(@start_time, 15) } || Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time) .group(:asn, :asn_org) .select("asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count") .order("event_count DESC") .limit(15) # Geographic breakdown - use DuckDB if available @top_countries = with_duckdb_fallback { EventDdb.top_countries_with_stats(@start_time, 15) } || Event.where("timestamp >= ? AND country IS NOT NULL", @start_time) .group(:country) .select("country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips") .order("event_count DESC") .limit(15) # Suspicious network activity patterns @suspicious_patterns = calculate_suspicious_patterns(@start_time) respond_to do |format| format.html format.json { render json: network_analytics_json } end end private def calculate_start_time(period) # Snap to hour/day boundaries for cacheability # Instead of rolling windows that change every second, use fixed boundaries case period when :hour # Last complete hour: if it's 13:45, show 12:00-13:00 1.hour.ago.beginning_of_hour when :day # Last 24 complete hours from current hour boundary 24.hours.ago.beginning_of_hour when :week # Last 7 complete days from today's start 7.days.ago.beginning_of_day when :month # Last 30 complete days from today's start 30.days.ago.beginning_of_day else 24.hours.ago.beginning_of_hour end end def prepare_chart_data_with_split_cache(cache_key_base, cache_ttl) # Split timeline into historical (completed hours) and current (incomplete hour) # Historical hours are cached for full TTL, current hour cached briefly for freshness # Cache historical hours (1-23 hours ago) - these are complete and won't change # No expiration - will stick around until evicted by cache store (uses DuckDB if available) historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do historical_start = 23.hours.ago.beginning_of_hour current_hour_start = Time.current.beginning_of_hour events_by_hour = with_duckdb_fallback { EventDdb.hourly_timeline(historical_start, current_hour_start) } || Event.where("timestamp >= ? AND timestamp < ?", historical_start, current_hour_start) .group("DATE_TRUNC('hour', timestamp)") .count (1..23).map do |hour_ago| hour_time = hour_ago.hours.ago.beginning_of_hour hour_key = hour_time.utc { time_iso: hour_time.iso8601, total: events_by_hour[hour_key] || 0 } end end # Current hour (0 hours ago) - cache very briefly since it's actively accumulating # ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute) current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do hour_time = Time.current.beginning_of_hour count = Event.where("timestamp >= ?", hour_time).count { time_iso: hour_time.iso8601, total: count } end # Combine current + historical for full 24-hour timeline timeline_data = [current_hour_data] + historical_timeline # Action distribution and other chart data (cached with main cache) other_chart_data = Rails.cache.fetch("#{cache_key_base}/chart_metadata", expires_in: cache_ttl) do action_distribution = @event_breakdown.map do |action, count| { action: action.humanize, count: count, percentage: ((count.to_f / [@total_events, 1].max) * 100).round(1) } end { actions: action_distribution, countries: @top_countries.map { |country, count| { country: country, count: count } }, network_types: [ { type: "Datacenter", count: @network_intelligence[:datacenter_ranges] }, { type: "VPN", count: @network_intelligence[:vpn_ranges] }, { type: "Proxy", count: @network_intelligence[:proxy_ranges] }, { type: "Standard", count: @network_intelligence[:total_ranges] - @network_intelligence[:datacenter_ranges] - @network_intelligence[:vpn_ranges] - @network_intelligence[:proxy_ranges] } ] } end # Merge timeline with other chart data other_chart_data.merge(timeline: timeline_data) end def prepare_chart_data # Legacy method - kept for reference but no longer used # Events over time (hourly buckets) - use @start_time for consistency events_by_hour = Event.where("timestamp >= ?", @start_time) .group("DATE_TRUNC('hour', timestamp)") .count # Convert to chart format - snap to hour boundaries for cacheability timeline_data = (0..23).map do |hour_ago| # Use hour boundaries instead of rolling times hour_time = hour_ago.hours.ago.beginning_of_hour hour_key = hour_time.utc { # Store as ISO string for JavaScript to handle timezone conversion time_iso: hour_time.iso8601, total: events_by_hour[hour_key] || 0 } end # Action distribution for pie chart action_distribution = @event_breakdown.map do |action, count| { action: action.humanize, count: count, percentage: ((count.to_f / [@total_events, 1].max) * 100).round(1) } end { timeline: timeline_data, actions: action_distribution, countries: @top_countries.map { |country, count| { country: country, count: count } }, network_types: [ { type: "Datacenter", count: @network_intelligence[:datacenter_ranges] }, { type: "VPN", count: @network_intelligence[:vpn_ranges] }, { type: "Proxy", count: @network_intelligence[:proxy_ranges] }, { type: "Standard", count: @network_intelligence[:total_ranges] - @network_intelligence[:datacenter_ranges] - @network_intelligence[:vpn_ranges] - @network_intelligence[:proxy_ranges] } ] } end def calculate_network_type_stats(start_time) # Try DuckDB first, fallback to PostgreSQL duckdb_stats = with_duckdb_fallback { EventDdb.network_type_stats(start_time) } return duckdb_stats if duckdb_stats # PostgreSQL fallback # Get all network types with their traffic statistics using denormalized columns network_types = [ { type: 'datacenter', label: 'Datacenter', column: :is_datacenter }, { type: 'vpn', label: 'VPN', column: :is_vpn }, { type: 'proxy', label: 'Proxy', column: :is_proxy } ] results = {} total_events = Event.where("timestamp >= ?", start_time).count network_types.each do |network_type| # Query events directly using denormalized flags event_stats = Event.where("timestamp >= ? AND #{network_type[:column]} = ?", start_time, true) .select("COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count") .reorder(nil) .take results[network_type[:type]] = { label: network_type[:label], networks: event_stats.network_count || 0, events: event_stats.event_count || 0, unique_ips: event_stats.unique_ips || 0, percentage: total_events > 0 ? ((event_stats.event_count.to_f / total_events) * 100).round(1) : 0 } end # Calculate standard networks (everything else) standard_stats = Event.where("timestamp >= ? AND is_datacenter = ? AND is_vpn = ? AND is_proxy = ?", start_time, false, false, false) .select("COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count") .take results['standard'] = { label: 'Standard', networks: standard_stats.network_count || 0, events: standard_stats.event_count || 0, unique_ips: standard_stats.unique_ips || 0, percentage: total_events > 0 ? ((standard_stats.event_count.to_f / total_events) * 100).round(1) : 0 } results end def calculate_suspicious_patterns(start_time) # Try DuckDB first, fallback to PostgreSQL duckdb_patterns = with_duckdb_fallback { EventDdb.suspicious_patterns(start_time) } return duckdb_patterns if duckdb_patterns # PostgreSQL fallback patterns = {} # High volume networks (top 1% by request count) - using denormalized network_range_id total_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time) .distinct.count(:network_range_id) if total_networks > 0 avg_events_per_network = Event.where("timestamp >= ?", start_time).count / total_networks high_volume_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time) .group(:network_range_id) .having("COUNT(*) > ?", avg_events_per_network * 5) .count patterns[:high_volume] = { count: high_volume_networks.count, networks: high_volume_networks.keys } else patterns[:high_volume] = { count: 0, networks: [] } end # Networks with high deny rates (> 50% blocked requests) - using denormalized network_range_id high_deny_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time) .group(:network_range_id) .select("network_range_id, COUNT(CASE WHEN waf_action = 0 THEN 1 END) as denied_count, COUNT(*) as total_count") .having("COUNT(CASE WHEN waf_action = 0 THEN 1 END)::float / COUNT(*) > 0.5") .having("COUNT(*) >= 10") # minimum threshold patterns[:high_deny_rate] = { count: high_deny_networks.length, network_ids: high_deny_networks.map(&:network_range_id) } # Companies appearing with multiple IPs (potential botnets) - using denormalized company column company_subnets = Event.where("timestamp >= ? AND company IS NOT NULL", start_time) .group(:company) .select("company, COUNT(DISTINCT ip_address) as ip_count") .having("COUNT(DISTINCT ip_address) > 5") .order("ip_count DESC") .limit(10) patterns[:distributed_companies] = company_subnets.map do |stat| { company: stat.company, subnets: stat.ip_count } end patterns end def network_analytics_json { top_networks: @top_networks.map { |network| { id: network.id, cidr: network.cidr, company: network.display_company, asn: network.asn, country: network.display_country, network_type: network.network_type, event_count: network.event_count, unique_ips: network.unique_ips, has_inherited_data: network.has_inherited_data?, inherited_from: network.inherited_from } }, network_breakdown: @network_breakdown, top_companies: @top_companies, top_asns: @top_asns, top_countries: @top_countries, suspicious_patterns: @suspicious_patterns } end def calculate_job_statistics # Get job queue information from SolidQueue begin total_jobs = SolidQueue::Job.count pending_jobs = SolidQueue::Job.where(finished_at: nil).count recent_jobs = SolidQueue::Job.where('created_at > ?', 1.hour.ago).count # Get jobs by queue name queue_breakdown = SolidQueue::Job.group(:queue_name).count # Get recent job activity recent_enqueued = SolidQueue::Job.where('created_at > ?', 1.hour.ago).count # Calculate health status health_status = if pending_jobs > 100 'warning' elsif pending_jobs > 500 'critical' else 'healthy' end { total_jobs: total_jobs, pending_jobs: pending_jobs, recent_enqueued: recent_enqueued, queue_breakdown: queue_breakdown, health_status: health_status } rescue => e Rails.logger.error "Failed to calculate job statistics: #{e.message}" { total_jobs: 0, pending_jobs: 0, recent_enqueued: 0, queue_breakdown: {}, health_status: 'error', error: e.message } end end # Helper method to try DuckDB first, fall back to PostgreSQL def with_duckdb_fallback(&block) result = yield result.nil? ? nil : result # Return result or nil to trigger fallback rescue StandardError => e Rails.logger.warn "[Analytics] DuckDB query failed, falling back to PostgreSQL: #{e.message}" nil # Return nil to trigger fallback end # Check if DuckDB has recent data (within last 2 minutes) # Returns true if DuckDB is up-to-date, false if potentially stale def duckdb_is_fresh? newest = AnalyticsDuckdbService.instance.newest_event_timestamp return false if newest.nil? # Consider fresh if newest event is within 2 minutes # (sync job runs every 1 minute, so 2 minutes allows for some lag) newest >= 2.minutes.ago rescue StandardError => e Rails.logger.warn "[Analytics] Error checking DuckDB freshness: #{e.message}" false end end