Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

Add DuckDB to the Docker build and the Gem
2025-11-18 16:40:05 +11:00 · 2025-11-18 16:37:54 +11:00
41 changed files with 3545 additions and 154 deletions
--- a/19
+++ b/19
@@ -14,10 +14,25 @@ FROM docker.io/library/ruby:$RUBY_VERSION-slim AS base
 # Rails app lives here
 WORKDIR /rails
-# Install base packages
+# Install base packages and DuckDB library
 ARG TARGETPLATFORM
 RUN apt-get update -qq && \
-    apt-get install --no-install-recommends -y curl libjemalloc2 libvips sqlite3 && \
+    apt-get install --no-install-recommends -y curl libjemalloc2 libvips sqlite3 wget unzip && \
    ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
    case "$TARGETPLATFORM" in \
      "linux/amd64") \
        DUCKDB_ARCH="amd64" ;; \
      "linux/arm64") \
        DUCKDB_ARCH="arm64" ;; \
      *) \
        echo "Unsupported platform: $TARGETPLATFORM" && exit 1 ;; \
    esac && \
    wget "https://install.duckdb.org/v1.4.2/libduckdb-linux-${DUCKDB_ARCH}.zip" -O /tmp/libduckdb.zip && \
    unzip /tmp/libduckdb.zip -d /tmp/duckdb && \
    cp /tmp/duckdb/duckdb.h /tmp/duckdb/duckdb.hpp /usr/local/include/ && \
    cp /tmp/duckdb/libduckdb.so /usr/local/lib/ && \
    ldconfig && \
    rm -rf /tmp/libduckdb.zip /tmp/duckdb && \
    rm -rf /var/lib/apt/lists /var/cache/apt/archives
 # Set production environment variables and enable jemalloc for reduced memory usage and latency.
--- a/2
+++ b/2
@@ -94,3 +94,5 @@ end
 gem "sentry-rails", "~> 6.1"
 gem "postgresql_cursor", "~> 0.6.9"
 gem "duckdb", "~> 1.4"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -116,6 +116,8 @@ GEM
    device_detector (1.1.3)
    dotenv (3.1.8)
    drb (2.2.3)
    duckdb (1.4.1.1)
      bigdecimal (>= 3.1.4)
    ed25519 (1.4.0)
    email_validator (2.2.4)
      activemodel
@@ -488,6 +490,7 @@ DEPENDENCIES
  countries
  debug
  device_detector
  duckdb (~> 1.4)
  httparty
  image_processing (~> 1.2)
  importmap-rails
--- a/2
+++ b/2
@@ -1 +1 @@
-0.1.3
+0.2.0
--- a/app/controllers/analytics_controller.rb
+++ b/app/controllers/analytics_controller.rb
@@ -23,9 +23,10 @@ class AnalyticsController < ApplicationController
    # Cache key includes period and start_time (hour-aligned for consistency)
    cache_key_base = "analytics/#{@time_period}/#{@start_time.to_i}"
-    # Core statistics - cached
+    # Core statistics - cached (uses DuckDB if available)
    @total_events = Rails.cache.fetch("#{cache_key_base}/total_events", expires_in: cache_ttl) do
-      Event.where("timestamp >= ?", @start_time).count
+      with_duckdb_fallback { EventDdb.count_since(@start_time) } ||
        Event.where("timestamp >= ?", @start_time).count
    end
    @total_rules = Rails.cache.fetch("analytics/total_rules", expires_in: 5.minutes) do
@@ -40,31 +41,33 @@ class AnalyticsController < ApplicationController
      NetworkRange.count
    end
-    # Event breakdown by action - cached
+    # Event breakdown by action - cached (uses DuckDB if available)
    @event_breakdown = Rails.cache.fetch("#{cache_key_base}/event_breakdown", expires_in: cache_ttl) do
-      Event.where("timestamp >= ?", @start_time)
+      with_duckdb_fallback { EventDdb.breakdown_by_action(@start_time) } ||
-           .group(:waf_action)
+        Event.where("timestamp >= ?", @start_time)
-           .count
+             .group(:waf_action)
-           # Keys are already strings ("allow", "deny", etc.) from the enum
+             .count
    end
-    # Top countries by event count - cached (now uses denormalized country column)
+    # Top countries by event count - cached (uses DuckDB if available)
    @top_countries = Rails.cache.fetch("#{cache_key_base}/top_countries", expires_in: cache_ttl) do
-      Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
+      with_duckdb_fallback { EventDdb.top_countries(@start_time, 10) } ||
-            .group(:country)
+        Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
-            .count
+              .group(:country)
-            .sort_by { |_, count| -count }
+              .count
-            .first(10)
+              .sort_by { |_, count| -count }
              .first(10)
    end
-    # Top blocked IPs - cached
+    # Top blocked IPs - cached (uses DuckDB if available)
    @top_blocked_ips = Rails.cache.fetch("#{cache_key_base}/top_blocked_ips", expires_in: cache_ttl) do
-      Event.where("timestamp >= ?", @start_time)
+      with_duckdb_fallback { EventDdb.top_blocked_ips(@start_time, 10) } ||
-           .where(waf_action: 1) # deny action in enum
+        Event.where("timestamp >= ?", @start_time)
-           .group(:ip_address)
+             .where(waf_action: 0) # deny action in enum
-           .count
+             .group(:ip_address)
-           .sort_by { |_, count| -count }
+             .count
-           .first(10)
+             .sort_by { |_, count| -count }
             .first(10)
    end
    # Network range intelligence breakdown - cached
@@ -92,7 +95,7 @@ class AnalyticsController < ApplicationController
        total_users: User.count,
        active_rules: Rule.enabled.count,
        disabled_rules: Rule.where(enabled: false).count,
-        recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 1).count # 1 = deny
+        recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 0).count # 0 = deny
      }
    end
@@ -117,38 +120,90 @@ class AnalyticsController < ApplicationController
    @time_period = params[:period]&.to_sym || :day
    @start_time = calculate_start_time(@time_period)
-    # Top networks by request volume (using denormalized network_range_id)
+    # Top networks by request volume - use DuckDB if available
-    # Use a subquery approach to avoid PostgreSQL GROUP BY issues with network_ranges.*
+    network_stats = with_duckdb_fallback { EventDdb.top_networks(@start_time, 50) }
    event_stats = Event.where("timestamp >= ?", @start_time)
                       .where.not(network_range_id: nil)
                       .group(:network_range_id)
                       .select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
-    # Join the stats back to NetworkRange to get full network details
+    if network_stats
-    @top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
+      # DuckDB path: array format [network_range_id, event_count, unique_ips]
-                               .select("network_ranges.*, stats.event_count, stats.unique_ips")
+      network_ids = network_stats.map { |row| row[0] }
-                               .order("stats.event_count DESC")
+      stats_by_id = network_stats.to_h { |row| [row[0], { event_count: row[1], unique_ips: row[2] }] }
-                               .limit(50)
+
      @top_networks = NetworkRange.where(id: network_ids)
                                 .to_a
                                 .map do |network|
        stats = stats_by_id[network.id]
        network.define_singleton_method(:event_count) { stats[:event_count] }
        network.define_singleton_method(:unique_ips) { stats[:unique_ips] }
        # Add inherited intelligence support
        intelligence = network.inherited_intelligence
        if intelligence[:inherited]
          network.define_singleton_method(:display_company) { intelligence[:company] }
          network.define_singleton_method(:display_country) { intelligence[:country] }
          network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
          network.define_singleton_method(:has_inherited_data?) { true }
        else
          network.define_singleton_method(:display_company) { network.company }
          network.define_singleton_method(:display_country) { network.country }
          network.define_singleton_method(:inherited_from) { nil }
          network.define_singleton_method(:has_inherited_data?) { false }
        end
        network
      end
                                 .sort_by { |n| -n.event_count }
    else
      # PostgreSQL fallback
      event_stats = Event.where("timestamp >= ?", @start_time)
                         .where.not(network_range_id: nil)
                         .group(:network_range_id)
                         .select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
      @top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
                                 .select("network_ranges.*, stats.event_count, stats.unique_ips")
                                 .order("stats.event_count DESC")
                                 .limit(50)
      # Add inherited intelligence support for PostgreSQL fallback
      @top_networks = @top_networks.to_a.map do |network|
        intelligence = network.inherited_intelligence
        if intelligence[:inherited]
          network.define_singleton_method(:display_company) { intelligence[:company] }
          network.define_singleton_method(:display_country) { intelligence[:country] }
          network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
          network.define_singleton_method(:has_inherited_data?) { true }
        else
          network.define_singleton_method(:display_company) { network.company }
          network.define_singleton_method(:display_country) { network.country }
          network.define_singleton_method(:inherited_from) { nil }
          network.define_singleton_method(:has_inherited_data?) { false }
        end
        network
      end
    end
    # Network type breakdown with traffic stats
    @network_breakdown = calculate_network_type_stats(@start_time)
-    # Company breakdown for top traffic sources (using denormalized company column)
+    # Company breakdown for top traffic sources - use DuckDB if available
-    @top_companies = Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
+    @top_companies = with_duckdb_fallback { EventDdb.top_companies(@start_time, 20) } ||
                     Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
                          .group(:company)
                          .select("company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
                          .order("event_count DESC")
                          .limit(20)
-    # ASN breakdown (using denormalized asn columns)
+    # ASN breakdown - use DuckDB if available
-    @top_asns = Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
+    @top_asns = with_duckdb_fallback { EventDdb.top_asns(@start_time, 15) } ||
                Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
                     .group(:asn, :asn_org)
                     .select("asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
                     .order("event_count DESC")
                     .limit(15)
-    # Geographic breakdown (using denormalized country column)
+    # Geographic breakdown - use DuckDB if available
-    @top_countries = Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
+    @top_countries = with_duckdb_fallback { EventDdb.top_countries_with_stats(@start_time, 15) } ||
                     Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
                          .group(:country)
                          .select("country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
                          .order("event_count DESC")
@@ -191,12 +246,15 @@ class AnalyticsController < ApplicationController
    # Historical hours are cached for full TTL, current hour cached briefly for freshness
    # Cache historical hours (1-23 hours ago) - these are complete and won't change
-    # No expiration - will stick around until evicted by cache store
+    # No expiration - will stick around until evicted by cache store (uses DuckDB if available)
    historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do
      historical_start = 23.hours.ago.beginning_of_hour
-      events_by_hour = Event.where("timestamp >= ? AND timestamp < ?", historical_start, Time.current.beginning_of_hour)
+      current_hour_start = Time.current.beginning_of_hour
-                           .group("DATE_TRUNC('hour', timestamp)")
+
-                           .count
+      events_by_hour = with_duckdb_fallback { EventDdb.hourly_timeline(historical_start, current_hour_start) } ||
                       Event.where("timestamp >= ? AND timestamp < ?", historical_start, current_hour_start)
                            .group("DATE_TRUNC('hour', timestamp)")
                            .count
      (1..23).map do |hour_ago|
        hour_time = hour_ago.hours.ago.beginning_of_hour
@@ -209,6 +267,7 @@ class AnalyticsController < ApplicationController
    end
    # Current hour (0 hours ago) - cache very briefly since it's actively accumulating
    # ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute)
    current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
      hour_time = Time.current.beginning_of_hour
      count = Event.where("timestamp >= ?", hour_time).count
@@ -290,6 +349,12 @@ class AnalyticsController < ApplicationController
  end
  def calculate_network_type_stats(start_time)
    # Try DuckDB first, fallback to PostgreSQL
    duckdb_stats = with_duckdb_fallback { EventDdb.network_type_stats(start_time) }
    return duckdb_stats if duckdb_stats
    # PostgreSQL fallback
    # Get all network types with their traffic statistics using denormalized columns
    network_types = [
      { type: 'datacenter', label: 'Datacenter', column: :is_datacenter },
@@ -333,6 +398,12 @@ class AnalyticsController < ApplicationController
  end
  def calculate_suspicious_patterns(start_time)
    # Try DuckDB first, fallback to PostgreSQL
    duckdb_patterns = with_duckdb_fallback { EventDdb.suspicious_patterns(start_time) }
    return duckdb_patterns if duckdb_patterns
    # PostgreSQL fallback
    patterns = {}
    # High volume networks (top 1% by request count) - using denormalized network_range_id
@@ -358,9 +429,9 @@ class AnalyticsController < ApplicationController
    high_deny_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time)
                              .group(:network_range_id)
                              .select("network_range_id,
-                                     COUNT(CASE WHEN waf_action = 1 THEN 1 END) as denied_count,
+                                     COUNT(CASE WHEN waf_action = 0 THEN 1 END) as denied_count,
                                     COUNT(*) as total_count")
-                              .having("COUNT(CASE WHEN waf_action = 1 THEN 1 END)::float / COUNT(*) > 0.5")
+                              .having("COUNT(CASE WHEN waf_action = 0 THEN 1 END)::float / COUNT(*) > 0.5")
                              .having("COUNT(*) >= 10") # minimum threshold
    patterns[:high_deny_rate] = {
@@ -392,12 +463,14 @@ class AnalyticsController < ApplicationController
        {
          id: network.id,
          cidr: network.cidr,
-          company: network.company,
+          company: network.display_company,
          asn: network.asn,
-          country: network.country,
+          country: network.display_country,
          network_type: network.network_type,
          event_count: network.event_count,
-          unique_ips: network.unique_ips
+          unique_ips: network.unique_ips,
          has_inherited_data: network.has_inherited_data?,
          inherited_from: network.inherited_from
        }
      },
      network_breakdown: @network_breakdown,
@@ -449,4 +522,27 @@ class AnalyticsController < ApplicationController
      }
    end
  end
  # Helper method to try DuckDB first, fall back to PostgreSQL
  def with_duckdb_fallback(&block)
    result = yield
    result.nil? ? nil : result # Return result or nil to trigger fallback
  rescue StandardError => e
    Rails.logger.warn "[Analytics] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
    nil # Return nil to trigger fallback
  end
  # Check if DuckDB has recent data (within last 2 minutes)
  # Returns true if DuckDB is up-to-date, false if potentially stale
  def duckdb_is_fresh?
    newest = AnalyticsDuckdbService.instance.newest_event_timestamp
    return false if newest.nil?
    # Consider fresh if newest event is within 2 minutes
    # (sync job runs every 1 minute, so 2 minutes allows for some lag)
    newest >= 2.minutes.ago
  rescue StandardError => e
    Rails.logger.warn "[Analytics] Error checking DuckDB freshness: #{e.message}"
    false
  end
 end
--- a/app/controllers/bot_network_ranges_controller.rb
+++ b/app/controllers/bot_network_ranges_controller.rb
@@ -0,0 +1,126 @@
 # frozen_string_literal: true
 class BotNetworkRangesController < ApplicationController
  before_action :authenticate_user!
  before_action :require_admin
  def index
    @bot_sources = BotNetworkRangeImporter::BOT_SOURCES
    @recent_imports = DataImport.where(import_type: 'bot_network_ranges').order(created_at: :desc).limit(10)
    @bot_network_ranges = NetworkRange.where("source LIKE 'bot_import_%'").order(created_at: :desc).limit(50)
  end
  def import
    source_key = params[:source]
    options = import_options
    if source_key.present?
      # Perform import synchronously for immediate feedback
      begin
        result = BotNetworkRangeImporter.import_from_source(source_key, options)
        # Create a data import record
        DataImport.create!(
          import_type: 'bot_network_ranges',
          source: source_key.to_s,
          status: 'completed',
          records_processed: result[:imported],
          notes: "Imported from #{result[:source]}: #{result[:note] || 'Success'}"
        )
        flash[:notice] = "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
      rescue => e
        flash[:alert] = "Failed to import from #{source_key}: #{e.message}"
      end
    else
      flash[:alert] = "Please select a source to import from"
    end
    redirect_to bot_network_ranges_path
  end
  def import_async
    source_key = params[:source]
    options = import_options
    if source_key.present?
      # Create a data import record for tracking
      data_import = DataImport.create!(
        import_type: 'bot_network_ranges',
        source: source_key.to_s,
        status: 'pending',
        records_processed: 0,
        notes: "Import job queued for #{source_key}"
      )
      # Queue the background job
      ImportBotNetworkRangesJob.perform_later(source_key, options.merge(data_import_id: data_import.id))
      flash[:notice] = "Import job queued for #{source_key}. You'll be notified when it's complete."
    else
      flash[:alert] = "Please select a source to import from"
    end
    redirect_to bot_network_ranges_path
  end
  def import_all
    options = import_options
    # Create a data import record for batch import
    data_import = DataImport.create!(
      import_type: 'bot_network_ranges',
      source: 'all_sources',
      status: 'pending',
      records_processed: 0,
      notes: "Batch import job queued for all available sources"
    )
    # Queue the batch import job
    ImportAllBotNetworkRangesJob.perform_later(options.merge(data_import_id: data_import.id))
    flash[:notice] = "Batch import job queued for all sources. This may take several minutes."
    redirect_to bot_network_ranges_path
  end
  def show
    @network_ranges = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
                                  .order(created_at: :desc)
                                  .page(params[:page])
                                  .per(50)
    @source_name = BotNetworkRangeImporter::BOT_SOURCES[params[:source].to_sym]&.dig(:name) || params[:source]
    @import_stats = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
                                .group(:source)
                                .count
  end
  def destroy
    source = params[:source]
    deleted_count = NetworkRange.where("source LIKE 'bot_import_#{source}%'").delete_all
    flash[:notice] = "Deleted #{deleted_count} network ranges from #{source}"
    redirect_to bot_network_ranges_path
  end
  private
  def require_admin
    redirect_to root_path, alert: 'Admin access required' unless current_user&.admin?
  end
  def import_options
    options = {}
    # AWS-specific options
    if params[:aws_services].present?
      options[:aws_services] = params[:aws_services].split(',').map(&:strip)
    end
    # Batch size control
    options[:batch_size] = params[:batch_size].to_i if params[:batch_size].present?
    options[:batch_size] = 1000 if options[:batch_size].zero?
    options
  end
 end
--- a/app/controllers/network_ranges_controller.rb
+++ b/app/controllers/network_ranges_controller.rb
@@ -46,8 +46,10 @@ class NetworkRangesController < ApplicationController
    authorize @network_range
    if @network_range.persisted?
-      # Real network - use direct IP containment for consistency with stats
+      # Real network - use indexed network_range_id for much better performance
-      events_scope = Event.where("ip_address <<= ?", @network_range.cidr).recent
+      # Include child network ranges to capture all traffic within this network block
      network_ids = [@network_range.id] + @network_range.child_ranges.pluck(:id)
      events_scope = Event.where(network_range_id: network_ids).recent
    else
      # Virtual network - find events by IP range containment
      events_scope = Event.where("ip_address <<= ?::inet", @network_range.to_s).recent
@@ -58,22 +60,24 @@ class NetworkRangesController < ApplicationController
    @child_ranges = @network_range.child_ranges.limit(20)
    @parent_ranges = @network_range.parent_ranges.limit(10)
-    @associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user).order(created_at: :desc) : []
+    @associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user, :network_range, :waf_policy).order(created_at: :desc) : []
    # Load rules from supernets and subnets
-    @supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user).limit(10) : []
+    @supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user, :waf_policy).limit(10) : []
-    @subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user).limit(20) : []
+    @subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user, :waf_policy).limit(20) : []
    # Traffic analytics (if we have events)
    @traffic_stats = calculate_traffic_stats(@network_range)
-    # Check if we have IPAPI data (or if parent has it)
+    # Check if we have IPAPI data (or if parent has it) - cache expensive parent lookup
    @has_ipapi_data = @network_range.has_network_data_from?(:ipapi)
    @parent_with_ipapi = nil
    unless @has_ipapi_data
-      # Check if parent has IPAPI data
+      # Cache expensive parent intelligence lookup
-      parent = @network_range.parent_with_intelligence
+      parent = Rails.cache.fetch("network_parent_intel:#{@network_range.cache_key}", expires_in: 1.hour) do
        @network_range.parent_with_intelligence
      end
      if parent&.has_network_data_from?(:ipapi)
        @parent_with_ipapi = parent
        @has_ipapi_data = true
@@ -194,6 +198,15 @@ class NetworkRangesController < ApplicationController
  private
  # Helper method to try DuckDB first, fall back to PostgreSQL
  def with_duckdb_fallback(&block)
    result = yield
    result.nil? ? nil : result # Return result or nil to trigger fallback
  rescue StandardError => e
    Rails.logger.warn "[NetworkRanges] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
    nil # Return nil to trigger fallback
  end
  def set_network_range
    # Handle CIDR slugs (e.g., "40.77.167.100_32" -> "40.77.167.100/32")
    cidr = params[:id].gsub('_', '/')
@@ -248,27 +261,37 @@ class NetworkRangesController < ApplicationController
        # Use indexed network_range_id for much better performance instead of expensive CIDR operator
        # Include child network ranges to capture all traffic within this network block
        network_ids = [network_range.id] + network_range.child_ranges.pluck(:id)
        base_query = Event.where(network_range_id: network_ids)
-        # Use separate queries: one for grouping (without ordering), one for recent activity (with ordering)
+        # Try DuckDB first for stats (much faster)
-        events_for_grouping = base_query.limit(1000)
+        duckdb_stats = with_duckdb_fallback { EventDdb.network_traffic_stats(network_ids) }
-        events_for_activity = base_query.recent.limit(20)
+        duckdb_top_paths = with_duckdb_fallback { EventDdb.network_top_paths(network_ids, 10) }
        duckdb_top_agents = with_duckdb_fallback { EventDdb.network_top_user_agents(network_ids, 5) }
-        # Calculate counts properly - use consistent base_query for all counts
+        if duckdb_stats
-        total_requests = base_query.count
+          # DuckDB success - use fast aggregated stats
-        unique_ips = base_query.except(:order).distinct.count(:ip_address)
+          stats = duckdb_stats.merge(
-        blocked_requests = base_query.blocked.count
+            top_paths: duckdb_top_paths&.to_h || {},
-        allowed_requests = base_query.allowed.count
+            top_user_agents: duckdb_top_agents&.to_h || {},
            recent_activity: Event.where(network_range_id: network_ids).recent.limit(20)
          )
        else
          # PostgreSQL fallback
          base_query = Event.where(network_range_id: network_ids)
          events_for_grouping = base_query.limit(1000)
          events_for_activity = base_query.recent.limit(20)
-        {
+          stats = {
-          total_requests: total_requests,
+            total_requests: base_query.count,
-          unique_ips: unique_ips,
+            unique_ips: base_query.except(:order).distinct.count(:ip_address),
-          blocked_requests: blocked_requests,
+            blocked_requests: base_query.blocked.count,
-          allowed_requests: allowed_requests,
+            allowed_requests: base_query.allowed.count,
-          top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
+            top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
-          top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
+            top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
-          recent_activity: events_for_activity
+            recent_activity: events_for_activity
-        }
+          }
        end
        stats
      else
        # No events - return empty stats
        {
@@ -296,8 +319,8 @@ class NetworkRangesController < ApplicationController
          unique_ips: base_query.except(:order).distinct.count(:ip_address),
          blocked_requests: base_query.blocked.count,
          allowed_requests: base_query.allowed.count,
-          top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
+          top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
-          top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
+          top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
          recent_activity: events_for_activity
        }
      else
--- a/app/controllers/rules_controller.rb
+++ b/app/controllers/rules_controller.rb
@@ -46,12 +46,9 @@ class RulesController < ApplicationController
    process_quick_create_parameters
    # Handle network range creation if CIDR is provided
-    if params[:cidr].present? && @rule.network_rule?
+    cidr_param = params[:new_cidr].presence || params[:cidr].presence
-      network_range = NetworkRange.find_or_create_by(cidr: params[:cidr]) do |range|
+    if cidr_param.present? && @rule.network_rule?
-        range.user = Current.user
+      network_range = NetworkRange.find_or_create_by_cidr(cidr_param, user: Current.user, source: 'manual')
        range.source = 'manual'
        range.creation_reason = "Created for rule ##{@rule.id}"
      end
      @rule.network_range = network_range
    end
@@ -132,7 +129,9 @@ class RulesController < ApplicationController
    :expires_at,
    :enabled,
    :source,
-    :network_range_id
+    :network_range_id,
    :header_name,
    :header_value
  ]
  # Only include conditions for non-network rules
@@ -250,15 +249,24 @@ def process_quick_create_parameters
    })
  end
-  # Parse metadata if it's a string that looks like JSON
+  # Parse metadata textarea first if it's JSON
-  if @rule.metadata.is_a?(String) && @rule.metadata.starts_with?('{')
+  if @rule.metadata.is_a?(String) && @rule.metadata.present? && @rule.metadata.starts_with?('{')
    begin
      @rule.metadata = JSON.parse(@rule.metadata)
    rescue JSON::ParserError
-      # Keep as string if not valid JSON
+      # Keep as string if not valid JSON - will be caught by validation
    end
  end
  # Ensure metadata is a hash
  @rule.metadata = {} unless @rule.metadata.is_a?(Hash)
  # Handle add_header fields - use provided params or existing metadata values
  if @rule.add_header_action? && (params[:header_name].present? || params[:header_value].present?)
    @rule.metadata['header_name'] = params[:header_name].presence || @rule.metadata['header_name'] || 'X-Bot-Agent'
    @rule.metadata['header_value'] = params[:header_value].presence || @rule.metadata['header_value'] || 'Unknown'
  end
  # Handle expires_at parsing for text input
  if params.dig(:rule, :expires_at).present?
    expires_at_str = params[:rule][:expires_at].strip
--- a/app/javascript/controllers/rule_form_controller.js
+++ b/app/javascript/controllers/rule_form_controller.js
@@ -0,0 +1,36 @@
 import { Controller } from "@hotwired/stimulus"
 export default class RuleFormController extends Controller {
  static targets = ["actionSelect", "addHeaderSection", "expirationCheckbox", "expirationField"]
  connect() {
    this.updateActionSections()
  }
  updateActionSections() {
    const selectedAction = this.actionSelectTarget.value
    // Hide all action-specific sections
    this.addHeaderSectionTarget.classList.add('hidden')
    // Show relevant section based on action
    switch(selectedAction) {
      case 'add_header':
        this.addHeaderSectionTarget.classList.remove('hidden')
        break
    }
  }
  toggleExpiration() {
    if (this.expirationCheckboxTarget.checked) {
      this.expirationFieldTarget.classList.remove('hidden')
    } else {
      this.expirationFieldTarget.classList.add('hidden')
      // Clear the datetime field when unchecked
      const datetimeInput = this.expirationFieldTarget.querySelector('input[type="datetime-local"]')
      if (datetimeInput) {
        datetimeInput.value = ''
      }
    }
  }
 }
--- a/app/jobs/cleanup_old_events_job.rb
+++ b/app/jobs/cleanup_old_events_job.rb
@@ -0,0 +1,52 @@
 # frozen_string_literal: true
 # CleanupOldEventsJob - Removes events older than the configured retention period
 #
 # This job runs periodically (hourly) to clean up old events based on the
 # event_retention_days setting. This helps keep the database size manageable
 # and improves query performance.
 #
 # The retention period is configurable via the 'event_retention_days' setting
 # (default: 90 days). This allows administrators to balance between historical
 # data retention and database performance.
 #
 # Schedule: Every hour (configured in config/recurring.yml)
 class CleanupOldEventsJob < ApplicationJob
  queue_as :background
  def perform
    retention_days = Setting.event_retention_days
    # Don't delete if retention is set to 0 or negative (disabled)
    if retention_days <= 0
      Rails.logger.info "CleanupOldEventsJob: Event retention disabled (retention_days: #{retention_days})"
      return 0
    end
    cutoff_date = retention_days.days.ago
    # Count events to be deleted
    old_events = Event.where('timestamp < ?', cutoff_date)
    count = old_events.count
    if count.zero?
      Rails.logger.info "CleanupOldEventsJob: No events older than #{retention_days} days found"
      return 0
    end
    Rails.logger.info "CleanupOldEventsJob: Deleting #{count} events older than #{retention_days} days (before #{cutoff_date})"
    # Delete in batches to avoid long-running transactions
    deleted_count = 0
    batch_size = 10_000
    old_events.in_batches(of: batch_size) do |batch|
      batch_count = batch.delete_all
      deleted_count += batch_count
      Rails.logger.info "CleanupOldEventsJob: Deleted batch of #{batch_count} events (total: #{deleted_count}/#{count})"
    end
    Rails.logger.info "CleanupOldEventsJob: Successfully deleted #{deleted_count} events"
    deleted_count
  end
 end
--- a/app/jobs/fetch_ipapi_data_job.rb
+++ b/app/jobs/fetch_ipapi_data_job.rb
@@ -15,31 +15,14 @@ class FetchIpapiDataJob < ApplicationJob
    ipapi_data = Ipapi.lookup(sample_ip)
    if ipapi_data.present? && !ipapi_data.key?('error')
-      # Check if IPAPI returned a different route than our tracking network
+      # Process IPAPI data and create network ranges
-      ipapi_route = ipapi_data.dig('asn', 'route')
+      result = Ipapi.process_ipapi_data(ipapi_data, tracking_network)
      target_network = tracking_network
-      if ipapi_route.present? && ipapi_route != tracking_network.cidr
+      # Mark the tracking network as having been queried
-        # IPAPI returned a different CIDR - find or create that network range
+      # Use the broadest CIDR returned for deduplication
-        Rails.logger.info "IPAPI returned different route: #{ipapi_route} (requested: #{tracking_network.cidr})"
+      tracking_network.mark_ipapi_queried!(result[:broadest_cidr])
-        target_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
+      Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (created #{result[:networks].length} networks)"
          nr.source = 'api_imported'
          nr.creation_reason = "Created from IPAPI lookup for #{tracking_network.cidr}"
        end
        Rails.logger.info "Storing IPAPI data on correct network: #{target_network.cidr}"
      end
      # Store data on the target network (wherever IPAPI said it belongs)
      target_network.set_network_data(:ipapi, ipapi_data)
      target_network.last_api_fetch = Time.current
      target_network.save!
      # Mark the tracking network as having been queried, with the CIDR that was returned
      tracking_network.mark_ipapi_queried!(target_network.cidr)
      Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (stored on #{target_network.cidr})"
      # Broadcast to the tracking network
      broadcast_ipapi_update(tracking_network, ipapi_data)
--- a/app/jobs/import_all_bot_network_ranges_job.rb
+++ b/app/jobs/import_all_bot_network_ranges_job.rb
@@ -0,0 +1,26 @@
 # frozen_string_literal: true
 # ImportAllBotNetworkRangesJob - Background job for importing from all bot sources
 class ImportAllBotNetworkRangesJob < ApplicationJob
  queue_as :default
  def perform(options = {})
    Rails.logger.info "Starting batch import of all bot network ranges"
    results = BotNetworkRangeImporter.import_all_sources(options)
    # Send completion summary
    Rails.logger.info "Batch import completed. Summary: #{results}"
    # Broadcast summary to clients
    ActionCable.server.broadcast(
      "bot_imports",
      {
        type: 'batch_summary',
        status: 'completed',
        results: results,
        message: "Batch import completed for all sources"
      }
    )
  end
 end
--- a/app/jobs/import_bot_network_ranges_job.rb
+++ b/app/jobs/import_bot_network_ranges_job.rb
@@ -0,0 +1,47 @@
 # frozen_string_literal: true
 # ImportBotNetworkRangesJob - Background job for importing bot network ranges
 #
 # Imports network ranges from official bot provider sources.
 # Runs asynchronously to avoid blocking the web interface.
 class ImportBotNetworkRangesJob < ApplicationJob
  queue_as :default
  def perform(source_key, options = {})
    Rails.logger.info "Starting bot network range import for source: #{source_key}"
    begin
      result = BotNetworkRangeImporter.import_from_source(source_key, options)
      # Send notification or log completion
      Rails.logger.info "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
      # Optionally broadcast via Turbo Streams for real-time updates
      ActionCable.server.broadcast(
        "bot_imports",
        {
          source: source_key,
          status: 'completed',
          imported: result[:imported],
          message: "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
        }
      )
    rescue => e
      Rails.logger.error "Bot network range import failed for #{source_key}: #{e.message}"
      # Broadcast error notification
      ActionCable.server.broadcast(
        "bot_imports",
        {
          source: source_key,
          status: 'error',
          error: e.message,
          message: "Failed to import from #{source_key}: #{e.message}"
        }
      )
      raise e
    end
  end
 end
--- a/app/jobs/process_waf_event_job.rb
+++ b/app/jobs/process_waf_event_job.rb
@@ -53,16 +53,15 @@ class ProcessWafEventJob < ApplicationJob
            # Queue IPAPI enrichment based on /24 tracking
            # The tracking network is the /24 that stores ipapi_queried_at
            if NetworkRange.should_fetch_ipapi_for_ip?(event.ip_address)
-              # Use tracking network for fetch status to avoid race conditions
+              # Atomically mark as fetching - this prevents duplicate jobs via database lock
-              if tracking_network.is_fetching_api_data?(:ipapi)
+              if tracking_network.mark_as_fetching_api_data!(:ipapi)
                Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - already being fetched"
              else
                tracking_network.mark_as_fetching_api_data!(:ipapi)
                Rails.logger.info "Queueing IPAPI fetch for IP #{event.ip_address} (tracking network: #{tracking_network.cidr})"
                FetchIpapiDataJob.perform_later(network_range_id: tracking_network.id)
              else
                Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - another job already started"
              end
            else
-              Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried recently"
+              Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried or being fetched"
            end
            # Evaluate WAF policies inline if needed (lazy evaluation)
--- a/app/jobs/sync_events_to_duckdb_job.rb
+++ b/app/jobs/sync_events_to_duckdb_job.rb
@@ -0,0 +1,89 @@
 # frozen_string_literal: true
 # Background job to sync events from PostgreSQL to DuckDB
 # Runs every 5 minutes to keep analytics database up-to-date
 # Uses watermark tracking to only sync new events
 class SyncEventsToDuckdbJob < ApplicationJob
  queue_as :default
  # Key for storing last sync timestamp in Rails cache
  WATERMARK_CACHE_KEY = "duckdb_last_sync_time"
  WATERMARK_TTL = 1.week
  # Overlap window to catch late-arriving events
  SYNC_OVERLAP = 1.minute
  def perform
    service = AnalyticsDuckdbService.instance
    # Determine where to start syncing
    from_timestamp = determine_sync_start_time(service)
    Rails.logger.info "[DuckDB Sync] Starting sync from #{from_timestamp}"
    # Sync new events using PostgreSQL cursor + DuckDB Appender
    # (setup_schema is called internally within sync_new_events)
    count = service.sync_new_events(from_timestamp)
    # Update watermark if we synced any events
    if count > 0
      update_last_sync_time
      Rails.logger.info "[DuckDB Sync] Successfully synced #{count} events"
    else
      Rails.logger.info "[DuckDB Sync] No new events to sync"
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB Sync] Job failed: #{e.message}"
    Rails.logger.error e.backtrace.join("\n")
    raise # Re-raise to mark job as failed in Solid Queue
  end
  private
  # Determine timestamp to start syncing from
  # Strategy:
  # 1. First run (DuckDB empty): sync from oldest PostgreSQL event
  # 2. Subsequent runs: sync from last watermark with overlap
  def determine_sync_start_time(service)
    oldest_duckdb = service.oldest_event_timestamp
    if oldest_duckdb.nil?
      # DuckDB is empty - this is the first sync
      # Start from oldest PostgreSQL event (or reasonable cutoff)
      oldest_pg = Event.minimum(:timestamp)
      if oldest_pg.nil?
        # No events in PostgreSQL at all
        Rails.logger.warn "[DuckDB Sync] No events found in PostgreSQL"
        1.day.ago # Default to recent window
      else
        Rails.logger.info "[DuckDB Sync] First sync - starting from oldest event: #{oldest_pg}"
        oldest_pg
      end
    else
      # DuckDB has data - sync from last watermark with overlap
      last_sync = Rails.cache.read(WATERMARK_CACHE_KEY)
      if last_sync.nil?
        # Watermark not in cache (maybe cache expired or restarted)
        # Fall back to newest event in DuckDB
        newest_duckdb = service.newest_event_timestamp
        start_time = newest_duckdb ? newest_duckdb - SYNC_OVERLAP : oldest_duckdb
        Rails.logger.info "[DuckDB Sync] Watermark not found, using newest DuckDB event: #{start_time}"
        start_time
      else
        # Normal case: use watermark with overlap to catch late arrivals
        start_time = last_sync - SYNC_OVERLAP
        Rails.logger.debug "[DuckDB Sync] Using watermark: #{last_sync} (with #{SYNC_OVERLAP}s overlap)"
        start_time
      end
    end
  end
  # Update last sync watermark in cache
  def update_last_sync_time
    now = Time.current
    Rails.cache.write(WATERMARK_CACHE_KEY, now, expires_in: WATERMARK_TTL)
    Rails.logger.debug "[DuckDB Sync] Updated watermark to #{now}"
  end
 end
--- a/app/models/event.rb
+++ b/app/models/event.rb
@@ -10,6 +10,17 @@ class Event < ApplicationRecord
  # Enums for fixed value sets
  # Canonical WAF action order - aligned with Rule and Agent models
  #
  # IMPORTANT: These values were swapped to match baffle-agent convention:
  # - deny: 0 (blocked traffic)
  # - allow: 1 (allowed traffic)
  #
  # When using raw integer values in queries:
  # - waf_action = 0  -> denied/blocked requests
  # - waf_action = 1  -> allowed requests
  # - waf_action = 2  -> redirect requests
  # - waf_action = 3  -> challenge requests
  # - waf_action = 4  -> log-only requests
  enum :waf_action, {
    deny: 0,        # deny/block
    allow: 1,       # allow/pass
@@ -341,11 +352,11 @@ class Event < ApplicationRecord
  end
  def blocked?
-    waf_action.in?(['block', 'deny'])
+    waf_action == 'deny'  # deny = 0
  end
  def allowed?
-    waf_action.in?(['allow', 'pass'])
+    waf_action == 'allow'  # allow = 1
  end
  def logged?
--- a/app/models/event_ddb.rb
+++ b/app/models/event_ddb.rb
@@ -0,0 +1,499 @@
 # frozen_string_literal: true
 require 'ostruct'
 # EventDdb - DuckDB-backed analytics queries for events
 # Provides an ActiveRecord-like interface for querying DuckDB events table
 # Falls back to PostgreSQL Event model if DuckDB is unavailable
 class EventDdb
  class << self
    # Get DuckDB service
    def service
      AnalyticsDuckdbService.instance
    end
    # Total events since timestamp
    def count_since(start_time)
      service.with_connection do |conn|
        result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
        result.first&.first || 0
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in count_since: #{e.message}"
      nil # Fallback to PostgreSQL
    end
    # Event breakdown by WAF action
    def breakdown_by_action(start_time)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time)
          SELECT waf_action, COUNT(*) as count
          FROM events
          WHERE timestamp >= ?
          GROUP BY waf_action
        SQL
        # Convert to hash like ActiveRecord .group.count returns
        result.to_a.to_h { |row| [row["waf_action"], row["count"]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}"
      nil
    end
    # Top countries with event counts
    def top_countries(start_time, limit = 10)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT country, COUNT(*) as count
          FROM events
          WHERE timestamp >= ? AND country IS NOT NULL
          GROUP BY country
          ORDER BY count DESC
          LIMIT ?
        SQL
        # Return array of [country, count] tuples like ActiveRecord
        result.to_a.map { |row| [row["country"], row["count"]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_countries: #{e.message}"
      nil
    end
    # Top blocked IPs
    def top_blocked_ips(start_time, limit = 10)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT ip_address, COUNT(*) as count
          FROM events
          WHERE timestamp >= ? AND waf_action = 0
          GROUP BY ip_address
          ORDER BY count DESC
          LIMIT ?
        SQL
        result.to_a.map { |row| [row["ip_address"], row["count"]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_blocked_ips: #{e.message}"
      nil
    end
    # Hourly timeline aggregation
    def hourly_timeline(start_time, end_time)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, end_time)
          SELECT
            DATE_TRUNC('hour', timestamp) as hour,
            COUNT(*) as count
          FROM events
          WHERE timestamp >= ? AND timestamp < ?
          GROUP BY hour
          ORDER BY hour
        SQL
        # Convert to hash with Time keys like ActiveRecord
        result.to_a.to_h { |row| [row["hour"], row["count"]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in hourly_timeline: #{e.message}"
      nil
    end
    # Top networks by traffic volume
    # Returns array of arrays: [network_range_id, event_count, unique_ips]
    def top_networks(start_time, limit = 50)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT
            network_range_id,
            COUNT(*) as event_count,
            COUNT(DISTINCT ip_address) as unique_ips
          FROM events
          WHERE timestamp >= ? AND network_range_id IS NOT NULL
          GROUP BY network_range_id
          ORDER BY event_count DESC
          LIMIT ?
        SQL
        result.to_a
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_networks: #{e.message}"
      nil
    end
    # Top companies
    # Returns array of OpenStruct objects with: company, event_count, unique_ips, network_count
    def top_companies(start_time, limit = 20)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT
            company,
            COUNT(*) as event_count,
            COUNT(DISTINCT ip_address) as unique_ips,
            COUNT(DISTINCT network_range_id) as network_count
          FROM events
          WHERE timestamp >= ? AND company IS NOT NULL
          GROUP BY company
          ORDER BY event_count DESC
          LIMIT ?
        SQL
        # Convert arrays to OpenStruct for attribute access
        result.to_a.map do |row|
          OpenStruct.new(
            company: row[0],
            event_count: row[1],
            unique_ips: row[2],
            network_count: row[3]
          )
        end
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_companies: #{e.message}"
      nil
    end
    # Top ASNs
    # Returns array of OpenStruct objects with: asn, asn_org, event_count, unique_ips, network_count
    def top_asns(start_time, limit = 15)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT
            asn,
            asn_org,
            COUNT(*) as event_count,
            COUNT(DISTINCT ip_address) as unique_ips,
            COUNT(DISTINCT network_range_id) as network_count
          FROM events
          WHERE timestamp >= ? AND asn IS NOT NULL
          GROUP BY asn, asn_org
          ORDER BY event_count DESC
          LIMIT ?
        SQL
        # Convert arrays to OpenStruct for attribute access
        result.to_a.map do |row|
          OpenStruct.new(
            asn: row[0],
            asn_org: row[1],
            event_count: row[2],
            unique_ips: row[3],
            network_count: row[4]
          )
        end
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_asns: #{e.message}"
      nil
    end
    # Network type breakdown (datacenter, VPN, proxy, standard)
    # Returns hash with network_type as key and hash of stats as value
    def network_type_breakdown(start_time)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time)
          SELECT
            CASE
              WHEN is_datacenter THEN 'datacenter'
              WHEN is_vpn THEN 'vpn'
              WHEN is_proxy THEN 'proxy'
              ELSE 'standard'
            END as network_type,
            COUNT(*) as event_count,
            COUNT(DISTINCT ip_address) as unique_ips,
            COUNT(DISTINCT network_range_id) as network_count
          FROM events
          WHERE timestamp >= ?
          GROUP BY network_type
        SQL
        # Convert arrays to hash: network_type => { event_count, unique_ips, network_count }
        result.to_a.to_h do |row|
          [
            row[0], # network_type
            {
              "event_count" => row[1],
              "unique_ips" => row[2],
              "network_count" => row[3]
            }
          ]
        end
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_type_breakdown: #{e.message}"
      nil
    end
    # Top countries with detailed stats (event count and unique IPs)
    # Returns array of OpenStruct objects with: country, event_count, unique_ips
    def top_countries_with_stats(start_time, limit = 15)
      service.with_connection do |conn|
        result = conn.query(<<~SQL, start_time, limit)
          SELECT
            country,
            COUNT(*) as event_count,
            COUNT(DISTINCT ip_address) as unique_ips
          FROM events
          WHERE timestamp >= ? AND country IS NOT NULL
          GROUP BY country
          ORDER BY event_count DESC
          LIMIT ?
        SQL
        # Convert arrays to OpenStruct for attribute access
        result.to_a.map do |row|
          OpenStruct.new(
            country: row[0],
            event_count: row[1],
            unique_ips: row[2]
          )
        end
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in top_countries_with_stats: #{e.message}"
      nil
    end
    # Network type stats with formatted output matching controller expectations
    # Returns hash with type keys containing label, networks, events, unique_ips, percentage
    def network_type_stats(start_time)
      service.with_connection do |conn|
        # Get total events for percentage calculation
        total_result = conn.query("SELECT COUNT(*) as total FROM events WHERE timestamp >= ?", start_time)
        total_events = total_result.first&.first || 0
        # Get breakdown by network type
        breakdown = network_type_breakdown(start_time)
        return nil unless breakdown
        # Format results with labels and percentages
        results = {}
        {
          'datacenter' => 'Datacenter',
          'vpn' => 'VPN',
          'proxy' => 'Proxy',
          'standard' => 'Standard'
        }.each do |type, label|
          stats = breakdown[type]
          event_count = stats ? stats["event_count"] : 0
          results[type] = {
            label: label,
            networks: stats ? stats["network_count"] : 0,
            events: event_count,
            unique_ips: stats ? stats["unique_ips"] : 0,
            percentage: total_events > 0 ? ((event_count.to_f / total_events) * 100).round(1) : 0
          }
        end
        results
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_type_stats: #{e.message}"
      nil
    end
    # Network range traffic statistics
    # Returns comprehensive stats for a given network range ID(s)
    def network_traffic_stats(network_range_ids)
      network_range_ids = Array(network_range_ids)
      return nil if network_range_ids.empty?
      service.with_connection do |conn|
        # Build IN clause with placeholders
        placeholders = network_range_ids.map { "?" }.join(", ")
        # Get all stats in a single query
        result = conn.query(<<~SQL, *network_range_ids)
          SELECT
            COUNT(*) as total_requests,
            COUNT(DISTINCT ip_address) as unique_ips,
            SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as blocked_requests,
            SUM(CASE WHEN waf_action = 1 THEN 1 ELSE 0 END) as allowed_requests
          FROM events
          WHERE network_range_id IN (#{placeholders})
        SQL
        stats_row = result.first
        return nil unless stats_row
        {
          total_requests: stats_row[0] || 0,
          unique_ips: stats_row[1] || 0,
          blocked_requests: stats_row[2] || 0,
          allowed_requests: stats_row[3] || 0
        }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_traffic_stats: #{e.message}"
      nil
    end
    # Top paths for network range(s)
    def network_top_paths(network_range_ids, limit = 10)
      network_range_ids = Array(network_range_ids)
      return nil if network_range_ids.empty?
      service.with_connection do |conn|
        # Build IN clause with placeholders
        placeholders = network_range_ids.map { "?" }.join(", ")
        result = conn.query(<<~SQL, *network_range_ids, limit)
          SELECT
            request_path,
            COUNT(*) as count
          FROM events
          WHERE network_range_id IN (#{placeholders})
            AND request_path IS NOT NULL
          GROUP BY request_path
          ORDER BY count DESC
          LIMIT ?
        SQL
        result.to_a.map { |row| [row[0], row[1]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_top_paths: #{e.message}"
      nil
    end
    # Top user agents for network range(s)
    def network_top_user_agents(network_range_ids, limit = 5)
      network_range_ids = Array(network_range_ids)
      return nil if network_range_ids.empty?
      service.with_connection do |conn|
        # Build IN clause with placeholders
        placeholders = network_range_ids.map { "?" }.join(", ")
        result = conn.query(<<~SQL, *network_range_ids, limit)
          SELECT
            user_agent,
            COUNT(*) as count
          FROM events
          WHERE network_range_id IN (#{placeholders})
            AND user_agent IS NOT NULL
          GROUP BY user_agent
          ORDER BY count DESC
          LIMIT ?
        SQL
        result.to_a.map { |row| [row[0], row[1]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_top_user_agents: #{e.message}"
      nil
    end
    # Full user agent tally for network range(s)
    # Returns hash of user_agent => count for all agents in the network
    def network_agent_tally(network_range_ids)
      network_range_ids = Array(network_range_ids)
      return nil if network_range_ids.empty?
      service.with_connection do |conn|
        # Build IN clause with placeholders
        placeholders = network_range_ids.map { "?" }.join(", ")
        result = conn.query(<<~SQL, *network_range_ids)
          SELECT
            user_agent,
            COUNT(*) as count
          FROM events
          WHERE network_range_id IN (#{placeholders})
            AND user_agent IS NOT NULL
          GROUP BY user_agent
        SQL
        # Convert to hash matching Ruby .tally format
        result.to_a.to_h { |row| [row[0], row[1]] }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in network_agent_tally: #{e.message}"
      nil
    end
    # Suspicious network activity patterns
    # Detects high-volume networks, high deny rates, and distributed companies
    def suspicious_patterns(start_time)
      service.with_connection do |conn|
        # High volume networks (5x average)
        avg_query = conn.query(<<~SQL, start_time)
          SELECT
            AVG(event_count) as avg_events
          FROM (
            SELECT network_range_id, COUNT(*) as event_count
            FROM events
            WHERE timestamp >= ? AND network_range_id IS NOT NULL
            GROUP BY network_range_id
          ) network_stats
        SQL
        avg_events = avg_query.first&.first || 0
        threshold = avg_events * 5
        high_volume = conn.query(<<~SQL, start_time, threshold)
          SELECT
            network_range_id,
            COUNT(*) as event_count
          FROM events
          WHERE timestamp >= ? AND network_range_id IS NOT NULL
          GROUP BY network_range_id
          HAVING COUNT(*) > ?
          ORDER BY event_count DESC
        SQL
        # High deny rate networks (>50% blocked, min 10 requests)
        high_deny = conn.query(<<~SQL, start_time)
          SELECT
            network_range_id,
            SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as denied_count,
            COUNT(*) as total_count
          FROM events
          WHERE timestamp >= ? AND network_range_id IS NOT NULL
          GROUP BY network_range_id
          HAVING CAST(SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) > 0.5
            AND COUNT(*) >= 10
          ORDER BY denied_count DESC
        SQL
        # Distributed companies (appearing with 5+ unique IPs)
        distributed_companies = conn.query(<<~SQL, start_time)
          SELECT
            company,
            COUNT(DISTINCT ip_address) as ip_count
          FROM events
          WHERE timestamp >= ? AND company IS NOT NULL
          GROUP BY company
          HAVING COUNT(DISTINCT ip_address) > 5
          ORDER BY ip_count DESC
          LIMIT 10
        SQL
        {
          high_volume: {
            count: high_volume.to_a.length,
            networks: high_volume.to_a.map { |row| row[0] } # network_range_id
          },
          high_deny_rate: {
            count: high_deny.to_a.length,
            network_ids: high_deny.to_a.map { |row| row[0] } # network_range_id
          },
          distributed_companies: distributed_companies.to_a.map { |row|
            {
              company: row[0], # company name
              subnets: row[1]  # ip_count
            }
          }
        }
      end
    rescue StandardError => e
      Rails.logger.error "[EventDdb] Error in suspicious_patterns: #{e.message}"
      nil
    end
  end
 end
--- a/app/models/network_range.rb
+++ b/app/models/network_range.rb
@@ -158,13 +158,26 @@ class NetworkRange < ApplicationRecord
  end
  def mark_as_fetching_api_data!(source)
-    self.network_data ||= {}
+    # Use database-level locking to prevent race conditions
-    self.network_data['fetching_status'] ||= {}
+    transaction do
-    self.network_data['fetching_status'][source.to_s] = {
+      # Reload with lock to get fresh data
-      'started_at' => Time.current.to_f,
+      lock!
-      'job_id' => SecureRandom.hex(8)
+
-    }
+      # Double-check that we're not already fetching
-    save!
+      if is_fetching_api_data?(source)
        Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
        return false
      end
      self.network_data ||= {}
      self.network_data['fetching_status'] ||= {}
      self.network_data['fetching_status'][source.to_s] = {
        'started_at' => Time.current.to_f,
        'job_id' => SecureRandom.hex(8)
      }
      save!
      true
    end
  end
  def clear_fetching_status!(source)
@@ -222,9 +235,29 @@ class NetworkRange < ApplicationRecord
  end
  def agent_tally
-    # Rails.cache.fetch("#{to_s}:agent_tally", expires_in: 5.minutes) do 
+    Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
-      events.map(&:user_agent).tally
+      # Use DuckDB for fast agent tally instead of loading all events into memory
-    # end
+      if persisted? && events_count > 0
        # Include child network ranges to capture all traffic within this network block
        network_ids = [id] + child_ranges.pluck(:id)
        # Try DuckDB first for much faster aggregation
        duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
        duckdb_tally || {}
      else
        # Virtual network - fallback to PostgreSQL CIDR query
        events.map(&:user_agent).tally
      end
    end
  end
  # Helper method to try DuckDB first, fall back to PostgreSQL
  def with_duckdb_fallback(&block)
    result = yield
    result.nil? ? nil : result # Return result or nil to trigger fallback
  rescue StandardError => e
    Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
    nil # Return nil to trigger fallback
  end
  # Geographic lookup
@@ -334,6 +367,9 @@ class NetworkRange < ApplicationRecord
  def self.should_fetch_ipapi_for_ip?(ip_address)
    tracking_network = find_or_create_tracking_network_for_ip(ip_address)
    # Check if currently being fetched (prevents duplicate jobs)
    return false if tracking_network.is_fetching_api_data?(:ipapi)
    # Check if /24 has been queried recently
    queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
    return true if queried_at.nil?
--- a/app/models/rule.rb
+++ b/app/models/rule.rb
@@ -7,7 +7,7 @@
 class Rule < ApplicationRecord
  # Rule enums (prefix needed to avoid rate_limit collision)
  # Canonical WAF action order - aligned with Agent and Event models
-  enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action
+  enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4, add_header: 5 }, prefix: :action
  enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
  SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
@@ -120,6 +120,10 @@ class Rule < ApplicationRecord
    action_challenge?
  end
  def add_header_action?
    action_add_header?
  end
  # Redirect/challenge convenience methods
  def redirect_url
    metadata_hash['redirect_url']
@@ -137,6 +141,14 @@ class Rule < ApplicationRecord
    metadata&.dig('challenge_message')
  end
  def header_name
    metadata&.dig('header_name')
  end
  def header_value
    metadata&.dig('header_value')
  end
  def related_surgical_rules
    if surgical_block?
      # Find the corresponding exception rule
@@ -421,6 +433,12 @@ class Rule < ApplicationRecord
    if source&.start_with?('auto:') || source == 'default'
      self.user ||= User.find_by(role: 1) # admin role
    end
    # Set default header values for add_header action
    if add_header_action?
      self.metadata['header_name'] ||= 'X-Bot-Agent'
      self.metadata['header_value'] ||= 'Unknown'
    end
  end
  def calculate_priority_for_network_rules
@@ -504,6 +522,13 @@ class Rule < ApplicationRecord
      if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
        errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
      end
    when "add_header"
      unless metadata&.dig("header_name").present?
        errors.add(:metadata, "must include 'header_name' for add_header action")
      end
      unless metadata&.dig("header_value").present?
        errors.add(:metadata, "must include 'header_value' for add_header action")
      end
    end
  end
--- a/app/models/waf_policy.rb
+++ b/app/models/waf_policy.rb
@@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord
  POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
  # Actions - what to do when traffic matches this policy
-  ACTIONS = %w[allow deny redirect challenge].freeze
+  ACTIONS = %w[allow deny redirect challenge add_header].freeze
  # Associations
  belongs_to :user
@@ -25,6 +25,7 @@ validate :targets_must_be_array
  validate :validate_targets_by_type
  validate :validate_redirect_configuration, if: :redirect_policy_action?
  validate :validate_challenge_configuration, if: :challenge_policy_action?
  validate :validate_add_header_configuration, if: :add_header_policy_action?
  # Scopes
  scope :enabled, -> { where(enabled: true) }
@@ -95,6 +96,10 @@ validate :targets_must_be_array
    policy_action == 'challenge'
  end
  def add_header_policy_action?
    policy_action == 'add_header'
  end
  # Lifecycle methods
  def active?
    enabled? && !expired?
@@ -163,7 +168,7 @@ validate :targets_must_be_array
      priority: network_range.prefix_length
    )
-    # Handle redirect/challenge specific data
+    # Handle redirect/challenge/add_header specific data
    if redirect_action? && additional_data['redirect_url']
      rule.update!(
        metadata: rule.metadata.merge(
@@ -178,6 +183,13 @@ validate :targets_must_be_array
          challenge_message: additional_data['challenge_message']
        )
      )
    elsif add_header_action?
      rule.update!(
        metadata: rule.metadata.merge(
          header_name: additional_data['header_name'],
          header_value: additional_data['header_value']
        )
      )
    end
    rule
@@ -212,7 +224,7 @@ validate :targets_must_be_array
        priority: 50  # Default priority for path rules
      )
-      # Handle redirect/challenge specific data
+      # Handle redirect/challenge/add_header specific data
      if redirect_action? && additional_data['redirect_url']
        rule.update!(
          metadata: rule.metadata.merge(
@@ -227,6 +239,13 @@ validate :targets_must_be_array
            challenge_message: additional_data['challenge_message']
          )
        )
      elsif add_header_action?
        rule.update!(
          metadata: rule.metadata.merge(
            header_name: additional_data['header_name'],
            header_value: additional_data['header_value']
          )
        )
      end
      rule
@@ -346,6 +365,12 @@ validate :targets_must_be_array
    self.targets ||= []
    self.additional_data ||= {}
    self.enabled = true if enabled.nil?
    # Set default header values for add_header action
    if add_header_policy_action?
      self.additional_data['header_name'] ||= 'X-Bot-Agent'
      self.additional_data['header_value'] ||= 'Unknown'
    end
  end
  def targets_must_be_array
@@ -430,6 +455,15 @@ validate :targets_must_be_array
    end
  end
  def validate_add_header_configuration
    if additional_data['header_name'].blank?
      errors.add(:additional_data, "must include 'header_name' for add_header action")
    end
    if additional_data['header_value'].blank?
      errors.add(:additional_data, "must include 'header_value' for add_header action")
    end
  end
  # Matching logic for different policy types
  def matches_country?(network_range)
    country = network_range.country || network_range.inherited_intelligence[:country]
--- a/app/services/analytics_duckdb_service.rb
+++ b/app/services/analytics_duckdb_service.rb
@@ -0,0 +1,284 @@
 # frozen_string_literal: true
 # Service for managing DuckDB analytics database
 # Provides fast analytical queries on events data using columnar storage
 class AnalyticsDuckdbService
  include Singleton
  DUCKDB_PATH = Rails.root.join("storage", "analytics.duckdb").to_s
  BATCH_SIZE = 10_000
  # Execute block with connection, ensuring database and connection are closed afterward
  def with_connection
    db = DuckDB::Database.open(DUCKDB_PATH)
    conn = db.connect
    yield conn
  ensure
    conn&.close
    db&.close
  end
  # Create events table if it doesn't exist (must be called within with_connection block)
  def setup_schema(conn)
    conn.execute(<<~SQL)
      CREATE TABLE IF NOT EXISTS events (
        id BIGINT PRIMARY KEY,
        timestamp TIMESTAMP NOT NULL,
        ip_address VARCHAR,
        network_range_id BIGINT,
        country VARCHAR,
        company VARCHAR,
        asn INTEGER,
        asn_org VARCHAR,
        is_datacenter BOOLEAN,
        is_vpn BOOLEAN,
        is_proxy BOOLEAN,
        waf_action INTEGER,
        request_path VARCHAR,
        user_agent VARCHAR
      )
    SQL
    Rails.logger.info "[DuckDB] Schema setup complete"
  end
  # Get timestamp of oldest event in DuckDB
  # Returns nil if table is empty
  def oldest_event_timestamp
    with_connection do |conn|
      result = conn.query("SELECT MIN(timestamp) as oldest FROM events")
      first_row = result.first
      first_row&.first # Returns the value or nil
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Error getting oldest timestamp: #{e.message}"
    nil
  end
  # Get timestamp of newest event in DuckDB
  # Returns nil if table is empty
  def newest_event_timestamp
    with_connection do |conn|
      result = conn.query("SELECT MAX(timestamp) as newest FROM events")
      first_row = result.first
      first_row&.first # Returns the value or nil
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Error getting newest timestamp: #{e.message}"
    nil
  end
  # Get maximum event ID already synced to DuckDB
  def max_synced_id
    with_connection do |conn|
      result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
      first_row = result.first
      first_row&.first || 0
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Error getting max ID: #{e.message}"
    0
  end
  # Sync new events from PostgreSQL to DuckDB
  # Uses PostgreSQL cursor for memory-efficient streaming
  # Uses Appender API for fast bulk inserts
  # Filters by ID to avoid duplicates
  def sync_new_events(from_timestamp)
    total_synced = 0
    with_connection do |conn|
      # Ensure table exists
      setup_schema(conn)
      # Get max ID already in DuckDB to avoid duplicates
      max_id_result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
      max_id = max_id_result.first&.first || 0
      Rails.logger.info "[DuckDB] Syncing events from #{from_timestamp}, max_id=#{max_id}"
      start_time = Time.current
      appender = nil
      batch_count = 0
      begin
        # Use PostgreSQL cursor for memory-efficient streaming
        Event.where("timestamp >= ? AND id > ?", from_timestamp, max_id)
             .select(
               :id,
               :timestamp,
               :ip_address,
               :network_range_id,
               :country,
               :company,
               :asn,
               :asn_org,
               :is_datacenter,
               :is_vpn,
               :is_proxy,
               :waf_action,
               :request_path,
               :user_agent
             )
             .order(:id)
             .each_row(block_size: BATCH_SIZE) do |event_data|
          # Create new appender for each batch
          if batch_count % BATCH_SIZE == 0
            appender&.close # Close previous appender
            appender = conn.appender("events")
          end
          # Unpack event data from cursor row (Hash from each_row)
          begin
            appender.append_row(
              event_data["id"],
              event_data["timestamp"],
              event_data["ip_address"]&.to_s,
              event_data["network_range_id"],
              event_data["country"],
              event_data["company"],
              event_data["asn"],
              event_data["asn_org"],
              event_data["is_datacenter"],
              event_data["is_vpn"],
              event_data["is_proxy"],
              event_data["waf_action"],
              event_data["request_path"],
              event_data["user_agent"]
            )
          rescue StandardError => e
            Rails.logger.error "[DuckDB] Error appending event #{event_data['id']}: #{e.message}"
            Rails.logger.error "[DuckDB] event_data = #{event_data.inspect}"
            raise
          end
          batch_count += 1
          total_synced += 1
          # Log progress every BATCH_SIZE events
          if batch_count % BATCH_SIZE == 0
            Rails.logger.info "[DuckDB] Synced batch (total: #{total_synced} events)"
          end
        end
        # Close final appender
        appender&.close
        duration = Time.current - start_time
        rate = total_synced / duration if duration > 0
        Rails.logger.info "[DuckDB] Sync complete: #{total_synced} events in #{duration.round(2)}s (~#{rate&.round(0)} events/sec)"
      rescue StandardError => e
        appender&.close rescue nil # Ensure appender is closed on error
        Rails.logger.error "[DuckDB] Error syncing events: #{e.message}"
        Rails.logger.error e.backtrace.join("\n")
        raise # Re-raise to be caught by outer rescue
      end
    end
    total_synced
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Sync failed: #{e.message}"
    0
  end
  # Execute analytical query on DuckDB
  def query(sql, *params)
    with_connection do |conn|
      conn.query(sql, *params)
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Query error: #{e.message}"
    Rails.logger.error "SQL: #{sql}"
    raise
  end
  # Get event count in DuckDB
  def event_count
    with_connection do |conn|
      result = conn.query("SELECT COUNT(*) as count FROM events")
      first_row = result.first
      first_row&.first || 0
    end
  rescue StandardError => e
    Rails.logger.error "[DuckDB] Error getting event count: #{e.message}"
    0
  end
  # Analytics query: Total events since timestamp
  def total_events_since(start_time)
    with_connection do |conn|
      result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
      result.first&.first || 0
    end
  end
  # Analytics query: Event breakdown by WAF action
  def event_breakdown_by_action(start_time)
    with_connection do |conn|
      result = conn.query(<<~SQL, start_time)
        SELECT waf_action, COUNT(*) as count
        FROM events
        WHERE timestamp >= ?
        GROUP BY waf_action
      SQL
      # Convert to hash like PostgreSQL returns
      result.to_a.to_h { |row| [row["waf_action"], row["count"]] }
    end
  end
  # Analytics query: Top countries
  def top_countries(start_time, limit = 10)
    with_connection do |conn|
      result = conn.query(<<~SQL, start_time, limit)
        SELECT country, COUNT(*) as count
        FROM events
        WHERE timestamp >= ? AND country IS NOT NULL
        GROUP BY country
        ORDER BY count DESC
        LIMIT ?
      SQL
      result.to_a.map { |row| [row["country"], row["count"]] }
    end
  end
  # Analytics query: Top blocked IPs
  def top_blocked_ips(start_time, limit = 10)
    with_connection do |conn|
      result = conn.query(<<~SQL, start_time, limit)
        SELECT ip_address, COUNT(*) as count
        FROM events
        WHERE timestamp >= ? AND waf_action = 0
        GROUP BY ip_address
        ORDER BY count DESC
        LIMIT ?
      SQL
      result.to_a.map { |row| [row["ip_address"], row["count"]] }
    end
  end
  # Analytics query: Hourly timeline (events grouped by hour)
  def hourly_timeline(start_time, end_time)
    with_connection do |conn|
      result = conn.query(<<~SQL, start_time, end_time)
        SELECT
          DATE_TRUNC('hour', timestamp) as hour,
          COUNT(*) as count
        FROM events
        WHERE timestamp >= ? AND timestamp < ?
        GROUP BY hour
        ORDER BY hour
      SQL
      # Convert to hash with Time keys like PostgreSQL
      result.to_a.to_h { |row| [row["hour"], row["count"]] }
    end
  end
  # Close DuckDB connection (for cleanup/testing)
  def close
    @connection&.close
    @connection = nil
  end
 end
--- a/app/services/bot_network_range_importer.rb
+++ b/app/services/bot_network_range_importer.rb
@@ -0,0 +1,573 @@
 # frozen_string_literal: true
 # BotNetworkRangeImporter - Service for importing official bot network ranges
 #
 # Imports network ranges from official bot provider sources like:
 # - Amazon AWS: https://ip-ranges.amazonaws.com/ip-ranges.json
 # - Google: Official crawler IP lists
 # - Microsoft/Bing: Bot network ranges
 # - Anthropic: Service network ranges
 # - OpenAI: Service network ranges
 class BotNetworkRangeImporter
  class ImportError < StandardError; end
  # Official sources for bot network ranges
  BOT_SOURCES = {
    amazon_aws: {
      name: 'Amazon AWS',
      url: 'https://ip-ranges.amazonaws.com/ip-ranges.json',
      format: :json,
      parser: :parse_aws_ranges,
      description: 'Official AWS IP ranges including Amazonbot and other services'
    },
    google: {
      name: 'Google',
      # Note: These URLs may need to be updated based on current Google documentation
      urls: [
        'https://developers.google.com/search/docs/files/googlebot.json',
        'https://developers.google.com/search/docs/files/special-crawlers.json'
      ],
      format: :json,
      parser: :parse_google_ranges,
      description: 'Googlebot and other Google crawler IP ranges'
    },
    microsoft_bing: {
      name: 'Microsoft Bing',
      # Note: Microsoft may require web scraping or API access
      url: 'https://www.bing.com/toolbox/bingbot.json',
      format: :json,
      parser: :parse_microsoft_ranges,
      description: 'Bingbot and other Microsoft crawler IP ranges'
    },
    anthropic: {
      name: 'Anthropic Claude',
      # Note: Anthropic ranges may need manual updates or different approach
      url: 'https://docs.anthropic.com/claude/reference/ip_ranges',
      format: :html,
      parser: :parse_anthropic_ranges,
      description: 'Anthropic Claude API service IP ranges'
    },
    openai_searchbot: {
      name: 'OpenAI SearchBot',
      url: 'https://openai.com/searchbot.json',
      format: :json,
      parser: :parse_openai_ranges,
      description: 'OpenAI SearchBot for ChatGPT search features'
    },
    openai_chatgpt_user: {
      name: 'OpenAI ChatGPT-User',
      url: 'https://openai.com/chatgpt-user.json',
      format: :json,
      parser: :parse_openai_ranges,
      description: 'OpenAI ChatGPT-User for user actions in ChatGPT and Custom GPTs'
    },
    openai_gptbot: {
      name: 'OpenAI GPTBot',
      url: 'https://openai.com/gptbot.json',
      format: :json,
      parser: :parse_openai_ranges,
      description: 'OpenAI GPTBot for training AI foundation models'
    },
    cloudflare: {
      name: 'Cloudflare',
      urls: [
        'https://www.cloudflare.com/ips-v4',
        'https://www.cloudflare.com/ips-v6'
      ],
      format: :text,
      parser: :parse_cloudflare_ranges,
      description: 'Cloudflare network ranges including their crawlers and services'
    },
    facebook: {
      name: 'Facebook/Meta',
      url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/',
      format: :html,
      parser: :parse_facebook_ranges,
      description: 'Facebook/Meta crawlers and bots'
    },
    applebot: {
      name: 'Applebot',
      url: 'https://support.apple.com/en-us/HT204683',
      format: :html,
      parser: :parse_applebot_ranges,
      description: 'Applebot crawler for Apple search and Siri'
    },
    duckduckgo: {
      name: 'DuckDuckBot',
      url: 'https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/',
      format: :html,
      parser: :parse_duckduckgo_ranges,
      description: 'DuckDuckGo search crawler'
    }
  }.freeze
  def self.import_from_source(source_key, options = {})
    source = BOT_SOURCES[source_key.to_sym]
    raise ImportError, "Unknown source: #{source_key}" unless source
    puts "Importing bot network ranges from #{source[:name]}..."
    case source[:parser]
    when :parse_aws_ranges
      parse_aws_ranges(source, options)
    when :parse_google_ranges
      parse_google_ranges(source, options)
    when :parse_microsoft_ranges
      parse_microsoft_ranges(source, options)
    when :parse_anthropic_ranges
      parse_anthropic_ranges(source, options)
    when :parse_openai_ranges
      parse_openai_ranges(source, options)
    when :parse_cloudflare_ranges
      parse_cloudflare_ranges(source, options)
    when :parse_facebook_ranges
      parse_facebook_ranges(source, options)
    when :parse_applebot_ranges
      parse_applebot_ranges(source, options)
    when :parse_duckduckgo_ranges
      parse_duckduckgo_ranges(source, options)
    else
      raise ImportError, "Unknown parser: #{source[:parser]}"
    end
  end
  def self.import_all_sources(options = {})
    results = {}
    BOT_SOURCES.each do |source_key, source|
      puts "\n" + "="*50
      puts "Processing #{source[:name]}..."
      puts "="*50
      begin
        results[source_key] = import_from_source(source_key, options)
      rescue => e
        Rails.logger.error "Failed to import from #{source[:name]}: #{e.message}"
        results[source_key] = { error: e.message, imported: 0 }
      end
    end
    puts "\n" + "="*50
    puts "Import Summary"
    puts "="*50
    results.each do |source, result|
      if result[:error]
        puts "#{source}: FAILED - #{result[:error]}"
      else
        puts "#{source}: SUCCESS - #{result[:imported]} ranges imported"
      end
    end
    results
  end
  private
  # Amazon AWS IP ranges parser
  def self.parse_aws_ranges(source, options = {})
    require 'net/http'
    require 'uri'
    uri = URI.parse(source[:url])
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.read_timeout = 30
    response = http.get(uri.request_uri)
    raise ImportError, "Failed to fetch AWS IP ranges: #{response.code}" unless response.code == '200'
    data = JSON.parse(response.body)
    imported_count = 0
    batch_size = options[:batch_size] || 1000
    batch = []
    # Filter for relevant services (can be customized)
    relevant_services = options[:aws_services] || ['AMAZON', 'ROUTE53', 'EC2', 'CLOUDFRONT']
    data['prefixes'].each do |prefix|
      # Focus on relevant services and regions
      next unless relevant_services.include?(prefix['service'])
      network_range = {
        network: prefix['ip_prefix'],
        source: 'bot_import_amazon_aws',
        asn: nil, # AWS doesn't provide ASN in this feed
        asn_org: 'Amazon Web Services',
        company: 'Amazon',
        country: nil,
        is_datacenter: true,
        is_proxy: false,
        is_vpn: false,
        additional_data: {
          aws_service: prefix['service'],
          aws_region: prefix['region'],
          aws_network_border_group: prefix['network_border_group'],
          import_date: Time.current.iso8601
        }.to_json
      }
      batch << network_range
      if batch.size >= batch_size
        imported_count += import_batch(batch, 'Amazon AWS')
        batch = []
        puts "Imported #{imported_count} AWS ranges..."
      end
    end
    # Import remaining records
    if batch.any?
      imported_count += import_batch(batch, 'Amazon AWS')
    end
    puts "Amazon AWS import completed: #{imported_count} ranges imported"
    { imported: imported_count, source: 'Amazon AWS' }
  rescue Net::TimeoutError, Net::OpenTimeout => e
    raise ImportError, "Network timeout while fetching AWS ranges: #{e.message}"
  rescue JSON::ParserError => e
    raise ImportError, "Failed to parse AWS JSON response: #{e.message}"
  end
  # Google crawler IP ranges parser
  def self.parse_google_ranges(source, options = {})
    imported_count = 0
    # Try each potential URL
    urls = Array(source[:urls] || source[:url])
    urls.each do |url|
      begin
        puts "Attempting to fetch Google ranges from: #{url}"
        uri = URI.parse(url)
        http = Net::HTTP.new(uri.host, uri.port)
        http.use_ssl = true
        http.read_timeout = 30
        response = http.get(uri.request_uri)
        next unless response.code == '200'
        data = JSON.parse(response.body)
        batch_size = options[:batch_size] || 1000
        batch = []
        # Parse Google crawler format (varies by file type)
        if data.is_a?(Array)
          data.each do |entry|
            next unless entry['cidr'] || entry['prefix']
            network_range = {
              network: entry['cidr'] || entry['prefix'],
              source: 'bot_import_google',
              asn: nil,
              asn_org: 'Google LLC',
              company: 'Google',
              country: nil,
              is_datacenter: true,
              is_proxy: false,
              is_vpn: false,
              additional_data: {
                crawler_type: entry['crawler_type'] || 'unknown',
                user_agent: entry['user_agent'],
                import_date: Time.current.iso8601
              }.to_json
            }
            batch << network_range
            if batch.size >= batch_size
              imported_count += import_batch(batch, 'Google')
              batch = []
              puts "Imported #{imported_count} Google ranges..."
            end
          end
        end
        # Import remaining records
        if batch.any?
          imported_count += import_batch(batch, 'Google')
        end
        puts "Google import completed: #{imported_count} ranges imported"
        return { imported: imported_count, source: 'Google' }
      rescue => e
        Rails.logger.warn "Failed to fetch from #{url}: #{e.message}"
        next
      end
    end
    raise ImportError, "Failed to fetch Google crawler ranges from any URL"
  end
  # Microsoft Bing crawler IP ranges parser
  def self.parse_microsoft_ranges(source, options = {})
    # Microsoft requires special handling as they may not provide direct JSON
    # This is a placeholder implementation
    puts "Microsoft Bing crawler import requires manual configuration or web scraping"
    puts "Refer to: https://www.bing.com/webmaster/help/which-crawlers-does-bing-use"
    {
      imported: 0,
      source: 'Microsoft Bing',
      note: 'Manual configuration required - Microsoft does not provide direct IP range feeds'
    }
  end
  # Anthropic service IP ranges parser
  def self.parse_anthropic_ranges(source, options = {})
    # Anthropic ranges may need to be manually configured
    # This is a placeholder implementation
    puts "Anthropic Claude service ranges require manual configuration"
    puts "Refer to: https://docs.anthropic.com/claude/reference/ip_ranges"
    {
      imported: 0,
      source: 'Anthropic',
      note: 'Manual configuration required - Anthropic does not provide automated IP range feeds'
    }
  end
  # OpenAI crawler IP ranges parser
  def self.parse_openai_ranges(source, options = {})
    require 'net/http'
    require 'uri'
    uri = URI.parse(source[:url])
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.read_timeout = 30
    response = http.get(uri.request_uri)
    raise ImportError, "Failed to fetch OpenAI IP ranges: #{response.code}" unless response.code == '200'
    data = JSON.parse(response.body)
    imported_count = 0
    batch_size = options[:batch_size] || 1000
    batch = []
    # Determine crawler type from source name
    crawler_type = source[:name].gsub('OpenAI ', '').downcase
    data.each do |entry|
      # OpenAI provides IP ranges as either CIDR notation or single IPs
      ip_range = entry['cidr'] || entry['ip_prefix'] || entry['ip']
      next unless ip_range
      # Convert single IPs to /32
      network = ip_range.include?('/') ? ip_range : "#{ip_range}/32"
      network_range = {
        network: network,
        source: "bot_import_openai_#{crawler_type}",
        asn: nil,
        asn_org: 'OpenAI',
        company: 'OpenAI',
        country: nil,
        is_datacenter: true,
        is_proxy: false,
        is_vpn: false,
        additional_data: {
          crawler_type: crawler_type,
          crawler_purpose: crawler_purpose(crawler_type),
          user_agent: openai_user_agent(crawler_type),
          import_date: Time.current.iso8601,
          source_url: source[:url]
        }.to_json
      }
      batch << network_range
      if batch.size >= batch_size
        imported_count += import_batch(batch, "OpenAI #{crawler_type}")
        batch = []
        puts "Imported #{imported_count} OpenAI #{crawler_type} ranges..."
      end
    end
    # Import remaining records
    if batch.any?
      imported_count += import_batch(batch, "OpenAI #{crawler_type}")
    end
    puts "OpenAI #{crawler_type} import completed: #{imported_count} ranges imported"
    { imported: imported_count, source: "OpenAI #{crawler_type}" }
  rescue Net::TimeoutError, Net::OpenTimeout => e
    raise ImportError, "Network timeout while fetching OpenAI #{crawler_type} ranges: #{e.message}"
  rescue JSON::ParserError => e
    raise ImportError, "Failed to parse OpenAI #{crawler_type} JSON response: #{e.message}"
  end
  def self.import_batch(batch_data, source_name)
    # Check for existing ranges to avoid duplicates
    existing_networks = NetworkRange.where(network: batch_data.map { |d| d[:network] }).pluck(:network)
    new_ranges = batch_data.reject { |d| existing_networks.include?(d[:network]) }
    if new_ranges.any?
      NetworkRange.insert_all(new_ranges)
      puts "Imported #{new_ranges.size} new #{source_name} ranges (#{batch_data.size - new_ranges.size} duplicates skipped)"
    else
      puts "No new #{source_name} ranges to import (all duplicates)"
    end
    new_ranges.size
  rescue => e
    Rails.logger.error "Failed to import #{source_name} batch: #{e.message}"
    # Fallback to individual imports
    imported = 0
    new_ranges.each do |data|
      begin
        NetworkRange.create!(data)
        imported += 1
      rescue => individual_error
        Rails.logger.error "Failed to import individual #{source_name} record: #{individual_error.message}"
      end
    end
    imported
  end
  # Helper method to determine crawler purpose based on type
  def self.crawler_purpose(crawler_type)
    case crawler_type
    when 'searchbot'
      'Used to link to and surface websites in search results in ChatGPT\'s search features'
    when 'chatgpt-user'
      'User actions in ChatGPT and Custom GPTs, including GPT Actions'
    when 'gptbot'
      'Used to crawl content for training OpenAI\'s generative AI foundation models'
    else
      'Unknown purpose'
    end
  end
  # Helper method to get OpenAI user agent strings
  def self.openai_user_agent(crawler_type)
    case crawler_type
    when 'searchbot'
      'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot'
    when 'chatgpt-user'
      'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot'
    when 'gptbot'
      'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot'
    else
      'Unknown user agent'
    end
  end
  # Cloudflare IP ranges parser
  def self.parse_cloudflare_ranges(source, options = {})
    require 'net/http'
    require 'uri'
    imported_count = 0
    urls = Array(source[:urls])
    batch_size = options[:batch_size] || 1000
    batch = []
    urls.each do |url|
      begin
        puts "Fetching Cloudflare ranges from: #{url}"
        uri = URI.parse(url)
        http = Net::HTTP.new(uri.host, uri.port)
        http.use_ssl = true
        http.read_timeout = 30
        response = http.get(uri.request_uri)
        raise ImportError, "Failed to fetch Cloudflare ranges: #{response.code}" unless response.code == '200'
        # Cloudflare provides plain text CIDR lists
        lines = response.body.split("\n")
        ip_version = url.include?('ips-v4') ? 4 : 6
        lines.each do |line|
          line = line.strip
          next if line.empty? || line.start_with?('#')
          # Validate CIDR format
          next unless line.match?(/\A[0-9a-fA-F:.]+\/\d+\z/)
          network_range = {
            network: line,
            source: 'bot_import_cloudflare',
            asn: nil,
            asn_org: 'Cloudflare',
            company: 'Cloudflare',
            country: nil,
            is_datacenter: true,
            is_proxy: false,
            is_vpn: false,
            additional_data: {
              ip_version: ip_version,
              import_date: Time.current.iso8601,
              source_url: url,
              service_type: 'cdn_and_security'
            }.to_json
          }
          batch << network_range
          if batch.size >= batch_size
            imported_count += import_batch(batch, 'Cloudflare')
            batch = []
            puts "Imported #{imported_count} Cloudflare ranges..."
          end
        end
      rescue => e
        Rails.logger.warn "Failed to fetch Cloudflare ranges from #{url}: #{e.message}"
        next
      end
    end
    # Import remaining records
    if batch.any?
      imported_count += import_batch(batch, 'Cloudflare')
    end
    puts "Cloudflare import completed: #{imported_count} ranges imported"
    { imported: imported_count, source: 'Cloudflare' }
  end
  # Facebook/Meta crawler ranges parser (placeholder)
  def self.parse_facebook_ranges(source, options = {})
    puts "Facebook/Meta crawler ranges require web scraping or manual configuration"
    puts "Refer to: https://developers.facebook.com/docs/sharing/webmasters/crawler/"
    {
      imported: 0,
      source: 'Facebook/Meta',
      note: 'Manual configuration required - Facebook does not provide automated IP range feeds'
    }
  end
  # Applebot crawler ranges parser (placeholder)
  def self.parse_applebot_ranges(source, options = {})
    puts "Applebot ranges require web scraping or manual configuration"
    puts "Refer to: https://support.apple.com/en-us/HT204683"
    {
      imported: 0,
      source: 'Applebot',
      note: 'Manual configuration required - Apple does not provide automated IP range feeds'
    }
  end
  # DuckDuckBot crawler ranges parser (placeholder)
  def self.parse_duckduckgo_ranges(source, options = {})
    puts "DuckDuckBot ranges require web scraping or manual configuration"
    puts "Refer to: https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/"
    {
      imported: 0,
      source: 'DuckDuckBot',
      note: 'Manual configuration required - DuckDuckGo does not provide automated IP range feeds'
    }
  end
 end
--- a/app/services/ipapi.rb
+++ b/app/services/ipapi.rb
@@ -53,4 +53,107 @@ class Ipapi
      next
    end
  end
  # Parse company/datacenter network range from IPAPI data
  # Handles "X.X.X.X - Y.Y.Y.Y" format and converts to CIDR
  def self.parse_company_network_range(ipapi_data)
    # Try company.network first, then datacenter.network
    network_range = ipapi_data.dig('company', 'network') || ipapi_data.dig('datacenter', 'network')
    return nil if network_range.blank?
    # Parse "X.X.X.X - Y.Y.Y.Y" format
    if network_range.include?(' - ')
      start_ip_str, end_ip_str = network_range.split(' - ').map(&:strip)
      begin
        start_ip = IPAddr.new(start_ip_str)
        end_ip = IPAddr.new(end_ip_str)
        # Calculate the number of IPs in the range
        num_ips = end_ip.to_i - start_ip.to_i + 1
        # Calculate prefix length from number of IPs
        # num_ips = 2^(32 - prefix_length) for IPv4
        prefix_length = 32 - Math.log2(num_ips).to_i
        # Verify it's a valid CIDR block (power of 2)
        if 2**(32 - prefix_length) == num_ips
          cidr = "#{start_ip_str}/#{prefix_length}"
          Rails.logger.debug "Parsed company network range: #{network_range} -> #{cidr}"
          return cidr
        else
          Rails.logger.warn "Network range #{network_range} is not a valid CIDR block (#{num_ips} IPs)"
          return nil
        end
      rescue IPAddr::InvalidAddressError => e
        Rails.logger.error "Invalid IP in company network range: #{network_range} (#{e.message})"
        return nil
      end
    elsif network_range.include?('/')
      # Already in CIDR format
      return network_range
    else
      Rails.logger.warn "Unknown network range format: #{network_range}"
      return nil
    end
  end
  # Populate NetworkRange attributes from IPAPI data
  def self.populate_network_attributes(network_range, ipapi_data)
    network_range.asn = ipapi_data.dig('asn', 'asn')
    network_range.asn_org = ipapi_data.dig('asn', 'org') || ipapi_data.dig('company', 'name')
    network_range.company = ipapi_data.dig('company', 'name')
    network_range.country = ipapi_data.dig('location', 'country_code')
    network_range.is_datacenter = ipapi_data['is_datacenter'] || false
    network_range.is_vpn = ipapi_data['is_vpn'] || false
    network_range.is_proxy = ipapi_data['is_proxy'] || false
  end
  # Process IPAPI data and create network ranges
  # Returns array of created/updated NetworkRange objects
  def self.process_ipapi_data(ipapi_data, tracking_network)
    created_networks = []
    # Extract and create company/datacenter network range if present
    company_network_cidr = parse_company_network_range(ipapi_data)
    if company_network_cidr.present?
      company_range = NetworkRange.find_or_create_by(network: company_network_cidr) do |nr|
        nr.source = 'api_imported'
        nr.creation_reason = "Company allocation from IPAPI for #{tracking_network.cidr}"
      end
      # Always update attributes (whether new or existing)
      populate_network_attributes(company_range, ipapi_data)
      company_range.set_network_data(:ipapi, ipapi_data)
      company_range.last_api_fetch = Time.current
      company_range.save!
      created_networks << company_range
      Rails.logger.info "Created/updated company network: #{company_range.cidr}"
    end
    # Extract and create ASN route network if present
    ipapi_route = ipapi_data.dig('asn', 'route')
    if ipapi_route.present? && ipapi_route != tracking_network.cidr
      route_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
        nr.source = 'api_imported'
        nr.creation_reason = "BGP route from IPAPI lookup for #{tracking_network.cidr}"
      end
      # Always update attributes (whether new or existing)
      populate_network_attributes(route_network, ipapi_data)
      route_network.set_network_data(:ipapi, ipapi_data)
      route_network.last_api_fetch = Time.current
      route_network.save!
      created_networks << route_network
      Rails.logger.info "Created/updated BGP route network: #{route_network.cidr}"
    end
    # Return both the created networks and the broadest CIDR for deduplication
    {
      networks: created_networks,
      broadest_cidr: company_network_cidr.presence || ipapi_route || tracking_network.cidr
    }
  end
 end
--- a/app/views/analytics/networks.html.erb
+++ b/app/views/analytics/networks.html.erb
@@ -141,8 +141,11 @@
                        class: "text-blue-600 hover:text-blue-800 hover:underline font-mono font-medium" %>
                  </div>
                  <div class="text-xs text-gray-500">
-                    <% if network.country.present? %>
+                    <% if network.display_country.present? %>
-                      🏳️ <%= network.country %>
+                      🏳️ <%= network.display_country %>
                      <% if network.has_inherited_data? && network.display_country != network.country %>
                        <span class="text-blue-600" title="Inherited from parent network">*</span>
                      <% end %>
                    <% end %>
                    <% if network.asn.present? %>
                      • ASN <%= network.asn %>
@@ -150,7 +153,15 @@
                  </div>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
-                  <%= network.company || 'Unknown' %>
+                  <div>
                    <%= network.display_company || 'Unknown' %>
                    <% if network.has_inherited_data? %>
                      <div class="text-xs text-blue-600">
                        from <%= link_to network.inherited_from, network_range_path(NetworkRange.find_by(network: network.inherited_from)),
                            class: "text-blue-600 hover:text-blue-800 hover:underline" %>
                      </div>
                    <% end %>
                  </div>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm">
                  <% if network.is_datacenter? %>
--- a/app/views/bot_network_ranges/index.html.erb
+++ b/app/views/bot_network_ranges/index.html.erb
@@ -0,0 +1,171 @@
 <% content_for :title, "Bot Network Ranges" %>
 <div class="max-w-7xl mx-auto px-4 py-8">
  <!-- Header -->
  <div class="mb-8">
    <h1 class="text-3xl font-bold text-gray-900 mb-2">Bot Network Ranges</h1>
    <p class="text-gray-600">Import and manage official network ranges for search crawlers and API bots</p>
  </div>
  <!-- Available Sources -->
  <div class="bg-white shadow rounded-lg mb-8">
    <div class="px-6 py-4 border-b border-gray-200">
      <h2 class="text-lg font-semibold text-gray-900">Available Sources</h2>
    </div>
    <div class="p-6">
      <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
        <% @bot_sources.each do |key, source| %>
          <div class="border rounded-lg p-4 hover:bg-gray-50 transition-colors">
            <div class="flex items-start justify-between mb-2">
              <h3 class="font-medium text-gray-900"><%= source[:name] %></h3>
              <span class="px-2 py-1 text-xs font-medium rounded-full <%= source[:url] ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
                <%= source[:url] ? 'Available' : 'Manual' %>
              </span>
            </div>
            <p class="text-sm text-gray-600 mb-4"><%= source[:description] %></p>
            <div class="flex flex-wrap gap-2">
              <%= form_with url: import_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
                <%= hidden_field_tag :source, key %>
                <%= f.submit "Import Now",
                    class: "px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded hover:bg-blue-700 transition-colors disabled:opacity-50",
                    disabled: !source[:url] %>
              <% end %>
              <%= form_with url: import_async_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
                <%= hidden_field_tag :source, key %>
                <%= f.submit "Import Async",
                    class: "px-3 py-1 text-xs font-medium text-white bg-purple-600 rounded hover:bg-purple-700 transition-colors disabled:opacity-50",
                    disabled: !source[:url] %>
              <% end %>
              <%= link_to "View", bot_network_range_path(key),
                  class: "px-3 py-1 text-xs font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
            </div>
          </div>
        <% end %>
      </div>
    </div>
  </div>
  <!-- Batch Import -->
  <div class="bg-white shadow rounded-lg mb-8">
    <div class="px-6 py-4 border-b border-gray-200">
      <h2 class="text-lg font-semibold text-gray-900">Batch Import</h2>
    </div>
    <div class="p-6">
      <p class="text-gray-600 mb-4">Import from all available sources (this may take several minutes).</p>
      <%= form_with url: import_all_bot_network_ranges_path, method: :post do |f| %>
        <div class="flex items-center gap-4">
          <%= f.submit "Import All Sources",
              class: "px-6 py-2 font-medium text-white bg-green-600 rounded hover:bg-green-700 transition-colors",
              confirm: "This will import from all available sources and may take several minutes. Continue?" %>
        </div>
      <% end %>
    </div>
  </div>
  <!-- Recent Imports -->
  <% if @recent_imports.any? %>
    <div class="bg-white shadow rounded-lg mb-8">
      <div class="px-6 py-4 border-b border-gray-200">
        <h2 class="text-lg font-semibold text-gray-900">Recent Imports</h2>
      </div>
      <div class="overflow-x-auto">
        <table class="min-w-full divide-y divide-gray-200">
          <thead class="bg-gray-50">
            <tr>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Status</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Records</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Notes</th>
            </tr>
          </thead>
          <tbody class="bg-white divide-y divide-gray-200">
            <% @recent_imports.each do |import| %>
              <tr>
                <td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
                  <%= import.source.titleize %>
                </td>
                <td class="px-6 py-4 whitespace-nowrap">
                  <span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= import.status == 'completed' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
                    <%= import.status.titleize %>
                  </span>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                  <%= import.records_processed&.to_s || '0' %>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                  <%= import.created_at.strftime('%Y-%m-%d %H:%M') %>
                </td>
                <td class="px-6 py-4 text-sm text-gray-500">
                  <%= import.notes %>
                </td>
              </tr>
            <% end %>
          </tbody>
        </table>
      </div>
    </div>
  <% end %>
  <!-- Recent Bot Network Ranges -->
  <% if @bot_network_ranges.any? %>
    <div class="bg-white shadow rounded-lg">
      <div class="px-6 py-4 border-b border-gray-200">
        <h2 class="text-lg font-semibold text-gray-900">Recently Imported Bot Ranges</h2>
      </div>
      <div class="overflow-x-auto">
        <table class="min-w-full divide-y divide-gray-200">
          <thead class="bg-gray-50">
            <tr>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
              <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
            </tr>
          </thead>
          <tbody class="bg-white divide-y divide-gray-200">
            <% @bot_network_ranges.each do |range| %>
              <tr>
                <td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
                  <%= range.network %>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                  <%= range.source.gsub('bot_import_', '').titleize %>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                  <%= range.company || 'Unknown' %>
                </td>
                <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                  <%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
                </td>
                <td class="px-6 py-4 text-sm text-gray-500">
                  <% if range.additional_data.present? %>
                    <% data = JSON.parse(range.additional_data) rescue {} %>
                    <% if data['crawler_type'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
                        <%= data['crawler_type'].titleize %>
                      </span>
                    <% end %>
                    <% if data['aws_service'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
                        <%= data['aws_service'] %>
                      </span>
                    <% end %>
                  <% end %>
                </td>
              </tr>
            <% end %>
          </tbody>
        </table>
      </div>
    </div>
  <% end %>
 </div>
 <!-- Real-time updates via Turbo Streams -->
 <turbo-stream-source src="/cable" channel="BotImportsChannel"></turbo-stream-source>
--- a/app/views/bot_network_ranges/show.html.erb
+++ b/app/views/bot_network_ranges/show.html.erb
@@ -0,0 +1,175 @@
 <% content_for :title, "#{@source_name} Network Ranges" %>
 <div class="max-w-7xl mx-auto px-4 py-8">
  <!-- Header -->
  <div class="mb-8">
    <div class="flex items-center justify-between">
      <div>
        <h1 class="text-3xl font-bold text-gray-900 mb-2"><%= @source_name %> Network Ranges</h1>
        <p class="text-gray-600">Network ranges imported from <%= @source_name %> official sources</p>
      </div>
      <div class="flex space-x-3">
        <%= link_to "Back to Sources", bot_network_ranges_path,
            class: "px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
        <%= form_with url: bot_network_range_path(params[:source]), method: :delete, class: "inline" do |f| %>
          <%= f.submit "Delete All Ranges",
              class: "px-4 py-2 text-sm font-medium text-white bg-red-600 rounded hover:bg-red-700 transition-colors",
              confirm: "Are you sure you want to delete all #{@source_name} network ranges? This action cannot be undone." %>
        <% end %>
      </div>
    </div>
  </div>
  <!-- Statistics -->
  <% if @import_stats.any? %>
    <div class="bg-white shadow rounded-lg mb-8">
      <div class="px-6 py-4 border-b border-gray-200">
        <h2 class="text-lg font-semibold text-gray-900">Import Statistics</h2>
      </div>
      <div class="p-6">
        <div class="grid grid-cols-1 md:grid-cols-3 gap-6">
          <% @import_stats.each do |source, count| %>
            <div class="text-center">
              <div class="text-3xl font-bold text-blue-600"><%= count %></div>
              <div class="text-sm text-gray-600 mt-1"><%= source.gsub('bot_import_', '').titleize %></div>
            </div>
          <% end %>
        </div>
      </div>
    </div>
  <% end %>
  <!-- Network Ranges Table -->
  <div class="bg-white shadow rounded-lg">
    <div class="px-6 py-4 border-b border-gray-200">
      <div class="flex items-center justify-between">
        <h2 class="text-lg font-semibold text-gray-900">Network Ranges</h2>
        <div class="text-sm text-gray-500">
          Showing <%= @network_ranges.offset_value + 1 %> to <%= [@network_ranges.offset_value + @network_ranges.current_page_count, @network_ranges.total_count].min %>
          of <%= @network_ranges.total_count %> ranges
        </div>
      </div>
    </div>
    <div class="overflow-x-auto">
      <table class="min-w-full divide-y divide-gray-200">
        <thead class="bg-gray-50">
          <tr>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Country</th>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
            <th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
          </tr>
        </thead>
        <tbody class="bg-white divide-y divide-gray-200">
          <% @network_ranges.each do |range| %>
            <tr class="hover:bg-gray-50">
              <td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
                <%= link_to range.network, network_range_path(range), class: "text-blue-600 hover:text-blue-800" %>
              </td>
              <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                <%= range.source.gsub('bot_import_', '').titleize %>
              </td>
              <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                <%= range.company || 'Unknown' %>
              </td>
              <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                <%= range.country || 'Unknown' %>
              </td>
              <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
                <%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
              </td>
              <td class="px-6 py-4 text-sm text-gray-500">
                <% if range.additional_data.present? %>
                  <% data = JSON.parse(range.additional_data) rescue {} %>
                  <div class="flex flex-wrap gap-1">
                    <% if data['crawler_type'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
                        <%= data['crawler_type'].titleize %>
                      </span>
                    <% end %>
                    <% if data['crawler_purpose'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-purple-100 text-purple-800" title="<%= data['crawler_purpose'] %>">
                        Purpose
                      </span>
                    <% end %>
                    <% if data['aws_service'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
                        <%= data['aws_service'] %>
                      </span>
                    <% end %>
                    <% if data['aws_region'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-green-100 text-green-800">
                        <%= data['aws_region'] %>
                      </span>
                    <% end %>
                    <% if data['ip_version'] %>
                      <span class="px-2 py-1 text-xs font-medium rounded bg-gray-100 text-gray-800">
                        IPv<%= data['ip_version'] %>
                      </span>
                    <% end %>
                  </div>
                <% end %>
              </td>
            </tr>
          <% end %>
        </tbody>
      </table>
    </div>
    <!-- Pagination -->
    <% if @network_ranges.total_pages > 1 %>
      <div class="px-6 py-4 border-t border-gray-200">
        <div class="flex items-center justify-between">
          <div class="text-sm text-gray-700">
            Page <%= @network_ranges.current_page %> of <%= @network_ranges.total_pages %>
          </div>
          <div class="flex space-x-2">
            <% if @network_ranges.prev_page %>
              <%= link_to "Previous", bot_network_range_path(params[:source], page: @network_ranges.prev_page),
                  class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
            <% end %>
            <%# Show page numbers %>
            <% (1..@network_ranges.total_pages).select { |p| p == 1 || p == @network_ranges.total_pages || (p - @network_ranges.current_page).abs <= 2 }.each do |page| %>
              <% if page == @network_ranges.current_page %>
                <span class="px-3 py-1 text-sm font-medium text-white bg-blue-600 rounded">
                  <%= page %>
                </span>
              <% else %>
                <%= link_to page, bot_network_range_path(params[:source], page: page),
                    class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
              <% end %>
            <% end %>
            <% if @network_ranges.next_page %>
              <%= link_to "Next", bot_network_range_path(params[:source], page: @network_ranges.next_page),
                  class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
            <% end %>
          </div>
        </div>
      </div>
    <% end %>
  </div>
  <% if @network_ranges.empty? %>
    <div class="bg-white shadow rounded-lg">
      <div class="px-6 py-12 text-center">
        <div class="text-gray-400 mb-4">
          <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
          </svg>
        </div>
        <h3 class="text-lg font-medium text-gray-900 mb-2">No network ranges found</h3>
        <p class="text-gray-600 mb-6">
          No <%= @source_name %> network ranges have been imported yet.
        </p>
        <%= link_to "Import #{@source_name} Ranges", bot_network_ranges_path,
            class: "inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" %>
      </div>
    </div>
  <% end %>
 </div>
--- a/app/views/events/index.html.erb
+++ b/app/views/events/index.html.erb
@@ -25,7 +25,7 @@
          <div>
            <%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
            <%= form.select :waf_action,
-                options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge']], params[:waf_action]),
+                options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Add Header', 'add_header']], params[:waf_action]),
                { }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
          </div>
          <div>
@@ -178,9 +178,10 @@
                       when 'deny' then 'bg-red-100 text-red-800'
                       when 'redirect' then 'bg-blue-100 text-blue-800'
                       when 'challenge' then 'bg-yellow-100 text-yellow-800'
                       when 'add_header' then 'bg-purple-100 text-purple-800'
                       else 'bg-gray-100 text-gray-800'
                       end %>">
-                    <%= event.waf_action %>
+                    <%= event.waf_action.humanize %>
                  </span>
                </td>
                <td class="px-6 py-4 text-sm font-mono text-gray-900">
--- a/app/views/layouts/application.html.erb
+++ b/app/views/layouts/application.html.erb
@@ -75,6 +75,8 @@
                    class: nav_link_class(network_ranges_path) %>
                <% if user_signed_in? && current_user_admin? %>
                  <%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
                      class: nav_link_class(bot_network_ranges_path) %>
                  <%= link_to "📊 Data Imports", data_imports_path,
                      class: nav_link_class(data_imports_path) %>
                  <%= link_to "🔗 DSNs", dsns_path,
@@ -172,6 +174,8 @@
                class: mobile_nav_link_class(network_ranges_path) %>
            <% if user_signed_in? && current_user_admin? %>
              <%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
                  class: mobile_nav_link_class(bot_network_ranges_path) %>
              <%= link_to "📊 Data Imports", data_imports_path,
                  class: mobile_nav_link_class(data_imports_path) %>
              <%= link_to "🔗 DSNs", dsns_path,
--- a/app/views/rules/index.html.erb
+++ b/app/views/rules/index.html.erb
@@ -225,14 +225,16 @@
                <td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
                  <%= link_to "View", rule_path(rule), class: "text-blue-600 hover:text-blue-900 mr-3" %>
                  <% if rule.enabled? %>
-                    <%= link_to "Disable", disable_rule_path(rule),
+                    <%= button_to "Disable", disable_rule_path(rule),
                        method: :post,
-                        data: { confirm: "Are you sure you want to disable this rule?" },
+                        form: { style: "display: inline;" },
-                        class: "text-yellow-600 hover:text-yellow-900 mr-3" %>
+                        data: { turbo_confirm: "Are you sure you want to disable this rule?" },
                        class: "text-yellow-600 hover:text-yellow-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
                  <% else %>
-                    <%= link_to "Enable", enable_rule_path(rule),
+                    <%= button_to "Enable", enable_rule_path(rule),
                        method: :post,
-                        class: "text-green-600 hover:text-green-900 mr-3" %>
+                        form: { style: "display: inline;" },
                        class: "text-green-600 hover:text-green-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
                  <% end %>
                  <%= link_to "Edit", edit_rule_path(rule), class: "text-indigo-600 hover:text-indigo-900" %>
                </td>
--- a/app/views/rules/new.html.erb
+++ b/app/views/rules/new.html.erb
@@ -6,7 +6,7 @@
    <p class="mt-2 text-gray-600">Create a WAF rule to allow, block, or rate limit traffic</p>
  </div>
-  <div class="bg-white shadow rounded-lg">
+  <div class="bg-white shadow rounded-lg" data-controller="rule-form">
    <%= form_with(model: @rule, local: true, class: "space-y-6") do |form| %>
      <% if @rule.errors.any? %>
        <div class="rounded-md bg-red-50 p-4">
@@ -54,7 +54,8 @@
            <%= form.select :waf_action,
                options_for_select(@waf_actions.map { |action, _| [action.humanize, action] }, @rule.waf_action),
                { prompt: "Select action" },
-                { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
+                { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
                  data: { rule_form_target: "actionSelect", action: "change->rule-form#updateActionSections" } } %>
            <p class="mt-2 text-sm text-gray-500">What action to take when this rule matches</p>
          </div>
        </div>
@@ -158,6 +159,27 @@
          </div>
        </div>
        <!-- Add Header Fields (shown for add_header action) -->
        <div id="add_header_section" class="hidden space-y-4" data-rule-form-target="addHeaderSection">
          <div>
            <%= label_tag :header_name, "Header Name", class: "block text-sm font-medium text-gray-700" %>
            <%= text_field_tag :header_name, "",
                class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
                placeholder: "X-Bot-Agent",
                id: "header_name_input" %>
            <p class="mt-2 text-sm text-gray-500">The HTTP header name to add (e.g., X-Bot-Agent, X-Network-Type)</p>
          </div>
          <div>
            <%= label_tag :header_value, "Header Value", class: "block text-sm font-medium text-gray-700" %>
            <%= text_field_tag :header_value, "",
                class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
                placeholder: "BingBot",
                id: "header_value_input" %>
            <p class="mt-2 text-sm text-gray-500">The value for the header (e.g., BingBot, GoogleBot, Unknown)</p>
          </div>
        </div>
        <!-- Metadata -->
        <div data-controller="json-validator" data-json-validator-valid-class="json-valid" data-json-validator-invalid-class="json-invalid" data-json-validator-valid-status-class="json-valid-status" data-json-validator-invalid-status-class="json-invalid-status">
          <%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %>
@@ -197,10 +219,18 @@
          </div>
          <div>
-            <%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
+            <div class="flex items-center mb-2">
-            <%= form.datetime_local_field :expires_at,
+              <%= check_box_tag :set_expiration, "1", false,
-                class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
+                  class: "h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500",
-            <p class="mt-2 text-sm text-gray-500">Leave blank for permanent rule</p>
+                  data: { rule_form_target: "expirationCheckbox", action: "change->rule-form#toggleExpiration" } %>
              <%= label_tag :set_expiration, "Set expiration", class: "ml-2 block text-sm font-medium text-gray-700" %>
            </div>
            <div class="hidden" data-rule-form-target="expirationField">
              <%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
              <%= form.datetime_local_field :expires_at,
                  class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
              <p class="mt-2 text-sm text-gray-500">When this rule should automatically expire</p>
            </div>
          </div>
          <div class="flex items-center pt-6">
--- a/app/views/rules/show.html.erb
+++ b/app/views/rules/show.html.erb
@@ -39,12 +39,12 @@
      <div class="flex space-x-3">
        <%= link_to "Edit", edit_rule_path(@rule), class: "inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50" %>
        <% if @rule.enabled? %>
-          <%= link_to "Disable", disable_rule_path(@rule),
+          <%= button_to "Disable", disable_rule_path(@rule),
              method: :post,
-              data: { confirm: "Are you sure you want to disable this rule?" },
+              data: { turbo_confirm: "Are you sure you want to disable this rule?" },
              class: "inline-flex items-center px-4 py-2 border border-yellow-300 rounded-md shadow-sm text-sm font-medium text-yellow-700 bg-yellow-50 hover:bg-yellow-100" %>
        <% else %>
-          <%= link_to "Enable", enable_rule_path(@rule),
+          <%= button_to "Enable", enable_rule_path(@rule),
              method: :post,
              class: "inline-flex items-center px-4 py-2 border border-green-300 rounded-md shadow-sm text-sm font-medium text-green-700 bg-green-50 hover:bg-green-100" %>
        <% end %>
--- a/config/recurring.yml
+++ b/config/recurring.yml
@@ -29,3 +29,9 @@ cleanup_old_events:
  class: CleanupOldEventsJob
  queue: background
  schedule: every hour
 # Sync events from PostgreSQL to DuckDB for fast analytics
 sync_events_to_duckdb:
  class: SyncEventsToDuckdbJob
  queue: default
  schedule: every 1 minutes
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -93,4 +93,16 @@ Rails.application.routes.draw do
      get :progress
    end
  end
  # Bot network range management (admin only)
  resources :bot_network_ranges, only: [:index, :show] do
    collection do
      post :import
      post :import_async
      post :import_all
    end
    member do
      delete :destroy
    end
  end
 end
--- a/docs/rule-architecture.md
+++ b/docs/rule-architecture.md
@@ -437,7 +437,7 @@ Ipv4Range.upsert({
  network_start: cidr.to_i,
  network_end: cidr.to_range.end.to_i,
  network_prefix: 8,
-  waf_action: 1,  # deny
+  waf_action: 0,  # deny
  priority: 8
 }, unique_by: :source)
--- a/test/controllers/rules_controller_test.rb
+++ b/test/controllers/rules_controller_test.rb
@@ -0,0 +1,65 @@
 require "test_helper"
 class RulesControllerTest < ActionDispatch::IntegrationTest
  setup do
    @user = users(:one)
    sign_in_as(@user)
  end
  test "should create network rule with add_header action" do
    assert_difference('Rule.count') do
      post rules_path, params: {
        rule: {
          waf_rule_type: "network",
          waf_action: "add_header",
          network_range_id: "",
          conditions: "{}",
          metadata: "{}",
          source: "manual",
          expires_at: "",
          enabled: "1"
        },
        new_cidr: "52.167.145.0/24",
        path_pattern: "",
        match_type: "exact",
        header_name: "X-Bot-Agent",
        header_value: "Blah"
      }
    end
    rule = Rule.last
    assert_equal "network", rule.waf_rule_type
    assert_equal "add_header", rule.waf_action, "waf_action should be 'add_header' but was #{rule.waf_action.inspect}"
    assert_equal "X-Bot-Agent", rule.metadata["header_name"]
    assert_equal "Blah", rule.metadata["header_value"]
    assert_not_nil rule.network_range
    # Network range stores as /32 if no prefix given
    assert_match /52\.167\.145\./, rule.network_range.network.to_s
    # Verify metadata JSON doesn't have duplicate keys
    metadata_json = rule.metadata.to_json
    refute_includes metadata_json, '"header_name":"X-Bot-Agent","header_value":"Blah","reason":"{}","header_name"',
                   "Metadata should not have duplicate keys"
  end
  test "should create rule with waf_action properly set from string parameter" do
    assert_difference('Rule.count') do
      post rules_path, params: {
        rule: {
          waf_rule_type: "network",
          waf_action: "deny",  # Test with different action
          network_range_id: "",
          conditions: "{}",
          metadata: '{"reason": "test"}',
          source: "manual",
          enabled: "1"
        },
        new_cidr: "10.0.0.1/32"
      }
    end
    rule = Rule.last
    assert_equal "deny", rule.waf_action, "waf_action should be 'deny'"
    assert_equal "network", rule.waf_rule_type
  end
 end
--- a/test/fixtures/files/ipapi_91_84_96_0.json
+++ b/test/fixtures/files/ipapi_91_84_96_0.json
@@ -0,0 +1,66 @@
 {
  "ip": "91.84.96.0",
  "rir": "RIPE",
  "is_bogon": false,
  "is_mobile": false,
  "is_satellite": false,
  "is_crawler": false,
  "is_datacenter": true,
  "is_tor": false,
  "is_proxy": false,
  "is_vpn": false,
  "is_abuser": false,
  "datacenter": {
    "datacenter": "SERVERS TECH FZCO",
    "domain": "vdsina.com",
    "network": "91.84.96.0 - 91.84.127.255"
  },
  "company": {
    "name": "SERVERS TECH FZCO",
    "abuser_score": "0.0162 (Elevated)",
    "domain": "vdsina.com",
    "type": "hosting",
    "network": "91.84.96.0 - 91.84.127.255",
    "whois": "https://api.ipapi.is/?whois=91.84.96.0"
  },
  "abuse": {
    "name": "SERVERS TECH FZCO",
    "address": "UNITED ARAB EMIRATES, Dubai, 336469, Ifza Business Park DDP, Building 1, office number 36298-001",
    "email": "abuse@vdsina.com",
    "phone": "+971525386329"
  },
  "asn": {
    "asn": 216071,
    "abuser_score": "0.0181 (Elevated)",
    "route": "91.84.96.0/24",
    "descr": "VDSINA, AE",
    "country": "ae",
    "active": true,
    "org": "SERVERS TECH FZCO",
    "domain": "vdsina.com",
    "abuse": "abuse@vdsina.com",
    "type": "hosting",
    "created": "2023-10-30",
    "updated": "2023-10-30",
    "rir": "RIPE",
    "whois": "https://api.ipapi.is/?whois=AS216071"
  },
  "location": {
    "is_eu_member": true,
    "calling_code": "31",
    "currency_code": "EUR",
    "continent": "EU",
    "country": "The Netherlands",
    "country_code": "NL",
    "state": "North Holland",
    "city": "Amsterdam",
    "latitude": 52.37403,
    "longitude": 4.88969,
    "zip": "1384",
    "timezone": "Europe/Brussels",
    "local_time": "2025-11-17T22:21:06+01:00",
    "local_time_unix": 1763414466,
    "is_dst": false
  },
  "elapsed_ms": 0.5
 }
--- a/test/jobs/cleanup_old_events_job_test.rb
+++ b/test/jobs/cleanup_old_events_job_test.rb
@@ -0,0 +1,195 @@
 # frozen_string_literal: true
 require "test_helper"
 class CleanupOldEventsJobTest < ActiveJob::TestCase
  setup do
    # Clear any existing events
    Event.delete_all
    # Set default retention to 90 days
    Setting.set('event_retention_days', '90')
  end
  test "deletes events older than retention period" do
    # Create old event (100 days ago - should be deleted)
    old_event = Event.create!(
      request_id: "old-request-#{SecureRandom.uuid}",
      timestamp: 100.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    # Create recent event (30 days ago - should be kept)
    recent_event = Event.create!(
      request_id: "recent-request-#{SecureRandom.uuid}",
      timestamp: 30.days.ago,
      ip_address: "5.6.7.8",
      payload: { request: { ip: "5.6.7.8" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 1, count
    assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
    assert_nothing_raised { recent_event.reload }
  end
  test "respects custom retention period" do
    # Set retention to 30 days
    Setting.set('event_retention_days', '30')
    # Create event that's 40 days old (should be deleted with 30-day retention)
    old_event = Event.create!(
      request_id: "old-request-#{SecureRandom.uuid}",
      timestamp: 40.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    # Create event that's 20 days old (should be kept)
    recent_event = Event.create!(
      request_id: "recent-request-#{SecureRandom.uuid}",
      timestamp: 20.days.ago,
      ip_address: "5.6.7.8",
      payload: { request: { ip: "5.6.7.8" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 1, count
    assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
    assert_nothing_raised { recent_event.reload }
  end
  test "does not delete when retention is zero" do
    Setting.set('event_retention_days', '0')
    old_event = Event.create!(
      request_id: "old-request-#{SecureRandom.uuid}",
      timestamp: 100.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 0, count
    assert_nothing_raised { old_event.reload }
  end
  test "does not delete when retention is negative" do
    Setting.set('event_retention_days', '-1')
    old_event = Event.create!(
      request_id: "old-request-#{SecureRandom.uuid}",
      timestamp: 100.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 0, count
    assert_nothing_raised { old_event.reload }
  end
  test "returns zero when no old events exist" do
    # Create only recent events
    Event.create!(
      request_id: "recent-request-#{SecureRandom.uuid}",
      timestamp: 30.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 0, count
  end
  test "returns zero when no events exist" do
    count = CleanupOldEventsJob.perform_now
    assert_equal 0, count
  end
  test "deletes multiple old events" do
    # Create 5 old events
    5.times do |i|
      Event.create!(
        request_id: "old-request-#{i}-#{SecureRandom.uuid}",
        timestamp: 100.days.ago,
        ip_address: "1.2.3.#{i}",
        payload: { request: { ip: "1.2.3.#{i}" } }
      )
    end
    # Create 3 recent events
    3.times do |i|
      Event.create!(
        request_id: "recent-request-#{i}-#{SecureRandom.uuid}",
        timestamp: 30.days.ago,
        ip_address: "5.6.7.#{i}",
        payload: { request: { ip: "5.6.7.#{i}" } }
      )
    end
    count = CleanupOldEventsJob.perform_now
    assert_equal 5, count
    assert_equal 3, Event.count
  end
  test "uses default retention when setting not configured" do
    # Remove the setting
    Setting.find_by(key: 'event_retention_days')&.destroy
    # Create event that's 100 days old (should be deleted with default 90-day retention)
    old_event = Event.create!(
      request_id: "old-request-#{SecureRandom.uuid}",
      timestamp: 100.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    # Create event that's 80 days old (should be kept with default 90-day retention)
    recent_event = Event.create!(
      request_id: "recent-request-#{SecureRandom.uuid}",
      timestamp: 80.days.ago,
      ip_address: "5.6.7.8",
      payload: { request: { ip: "5.6.7.8" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 1, count
    assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
    assert_nothing_raised { recent_event.reload }
  end
  test "handles events at exact cutoff boundary correctly" do
    Setting.set('event_retention_days', '90')
    # Create event exactly at cutoff (should be deleted - uses < comparison)
    cutoff_event = Event.create!(
      request_id: "cutoff-request-#{SecureRandom.uuid}",
      timestamp: 90.days.ago,
      ip_address: "1.2.3.4",
      payload: { request: { ip: "1.2.3.4" } }
    )
    # Create event just inside cutoff (should be kept)
    inside_event = Event.create!(
      request_id: "inside-request-#{SecureRandom.uuid}",
      timestamp: 89.days.ago,
      ip_address: "5.6.7.8",
      payload: { request: { ip: "5.6.7.8" } }
    )
    count = CleanupOldEventsJob.perform_now
    assert_equal 1, count
    assert_raises(ActiveRecord::RecordNotFound) { cutoff_event.reload }
    assert_nothing_raised { inside_event.reload }
  end
 end
--- a/test/models/event_test.rb
+++ b/test/models/event_test.rb
@@ -228,7 +228,7 @@ class EventTest < ActiveSupport::TestCase
    assert_equal "post", event.request_method
    assert_equal "deny", event.waf_action
    assert_equal 1, event.request_method_before_type_cast  # POST = 1
-    assert_equal 1, event.waf_action_before_type_cast      # DENY = 1
+    assert_equal 0, event.waf_action_before_type_cast      # DENY = 0
  end
  test "payload extraction methods work correctly" do
--- a/test/models/rule_path_pattern_test.rb
+++ b/test/models/rule_path_pattern_test.rb
@@ -0,0 +1,233 @@
 # frozen_string_literal: true
 require "test_helper"
 class RulePathPatternTest < ActiveSupport::TestCase
  setup do
    @user = User.create!(email_address: "test@example.com", password: "password123")
  end
  test "create_path_pattern_rule creates valid rule" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin/users",
      match_type: "exact",
      action: "deny",
      user: @user
    )
    assert rule.persisted?, "Rule should be persisted"
    assert_equal "path_pattern", rule.waf_rule_type
    assert_equal "deny", rule.waf_action
    assert_equal "exact", rule.path_match_type
    assert_equal 2, rule.path_segment_ids.length
  end
  test "create_path_pattern_rule auto-creates PathSegments" do
    initial_count = PathSegment.count
    rule = Rule.create_path_pattern_rule(
      pattern: "/new/path/here",
      match_type: "prefix",
      user: @user
    )
    assert_equal initial_count + 3, PathSegment.count, "Should create 3 new segments"
    assert_equal 3, rule.path_segment_ids.length
  end
  test "create_path_pattern_rule normalizes to lowercase" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/Admin/Users",
      match_type: "exact",
      user: @user
    )
    segments = rule.path_segments_text
    assert_equal ["admin", "users"], segments, "Segments should be lowercase"
  end
  test "create_path_pattern_rule reuses existing PathSegments" do
    # Create segment first
    PathSegment.find_or_create_segment("admin")
    initial_count = PathSegment.count
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin",
      match_type: "exact",
      user: @user
    )
    assert_equal initial_count, PathSegment.count, "Should not create duplicate segment"
    assert_equal 1, rule.path_segment_ids.length
  end
  test "create_path_pattern_rule validates match_type" do
    assert_raises(ArgumentError, "Should raise for invalid match_type") do
      Rule.create_path_pattern_rule(
        pattern: "/admin",
        match_type: "invalid",
        user: @user
      )
    end
  end
  test "create_path_pattern_rule validates pattern not empty" do
    assert_raises(ArgumentError, "Should raise for empty pattern") do
      Rule.create_path_pattern_rule(
        pattern: "/",
        match_type: "exact",
        user: @user
      )
    end
  end
  test "validation requires segment_ids for path_pattern rules" do
    rule = Rule.new(
      waf_rule_type: "path_pattern",
      waf_action: "deny",
      conditions: { match_type: "exact" },  # Missing segment_ids
      user: @user
    )
    refute rule.valid?, "Rule should be invalid without segment_ids"
    assert_includes rule.errors[:conditions], "must include 'segment_ids' array for path_pattern rules"
  end
  test "validation requires match_type for path_pattern rules" do
    admin_seg = PathSegment.find_or_create_segment("admin")
    rule = Rule.new(
      waf_rule_type: "path_pattern",
      waf_action: "deny",
      conditions: { segment_ids: [admin_seg.id] },  # Missing match_type
      user: @user
    )
    refute rule.valid?, "Rule should be invalid without match_type"
    assert_includes rule.errors[:conditions], "match_type must be one of: exact, prefix, suffix, contains"
  end
  test "validation checks match_type is valid" do
    admin_seg = PathSegment.find_or_create_segment("admin")
    rule = Rule.new(
      waf_rule_type: "path_pattern",
      waf_action: "deny",
      conditions: { segment_ids: [admin_seg.id], match_type: "invalid" },
      user: @user
    )
    refute rule.valid?, "Rule should be invalid with invalid match_type"
    assert_includes rule.errors[:conditions], "match_type must be one of: exact, prefix, suffix, contains"
  end
  test "validation checks segment IDs exist" do
    rule = Rule.new(
      waf_rule_type: "path_pattern",
      waf_action: "deny",
      conditions: { segment_ids: [99999], match_type: "exact" },  # Non-existent ID
      user: @user
    )
    refute rule.valid?, "Rule should be invalid with non-existent segment IDs"
    assert_match(/non-existent path segment IDs/, rule.errors[:conditions].first)
  end
  test "path_pattern_display returns human-readable path" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin/users",
      match_type: "exact",
      user: @user
    )
    assert_equal "/admin/users", rule.path_pattern_display
  end
  test "path_segments_text returns segment text array" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/api/v1/users",
      match_type: "prefix",
      user: @user
    )
    assert_equal ["api", "v1", "users"], rule.path_segments_text
  end
  test "to_agent_format includes segment_ids and match_type for path rules" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin",
      match_type: "prefix",
      action: "deny",
      user: @user
    )
    agent_format = rule.to_agent_format
    assert_equal "path_pattern", agent_format[:waf_rule_type]
    assert_equal "deny", agent_format[:waf_action]
    assert agent_format[:conditions].key?(:segment_ids), "Should include segment_ids"
    assert_equal "prefix", agent_format[:conditions][:match_type]
    assert_kind_of Array, agent_format[:conditions][:segment_ids]
  end
  test "supports all four match types" do
    %w[exact prefix suffix contains].each do |match_type|
      rule = Rule.create_path_pattern_rule(
        pattern: "/admin",
        match_type: match_type,
        user: @user
      )
      assert rule.persisted?, "Should create rule with #{match_type} match type"
      assert_equal match_type, rule.path_match_type
    end
  end
  test "supports all action types" do
    %w[allow deny challenge].each do |action|
      rule = Rule.create_path_pattern_rule(
        pattern: "/admin",
        match_type: "exact",
        action: action,
        user: @user
      )
      assert rule.persisted?, "Should create rule with #{action} action"
      assert_equal action, rule.waf_action
    end
  end
  test "supports redirect action with metadata" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin",
      match_type: "exact",
      action: "redirect",
      user: @user,
      metadata: { redirect_url: "https://example.com" }
    )
    assert rule.persisted?, "Should create rule with redirect action"
    assert_equal "redirect", rule.waf_action
  end
  test "stores metadata with human-readable segments" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin/users",
      match_type: "exact",
      user: @user
    )
    assert_equal ["admin", "users"], rule.metadata["segments"]
    assert_equal "/admin/users", rule.metadata["pattern_display"]
  end
  test "stores original pattern in conditions" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/Admin/Users",  # Mixed case
      match_type: "exact",
      user: @user
    )
    assert_equal "/Admin/Users", rule.conditions["original_pattern"]
  end
 end
--- a/test/services/ipapi_test.rb
+++ b/test/services/ipapi_test.rb
@@ -0,0 +1,134 @@
 require "test_helper"
 class IpapiTest < ActiveSupport::TestCase
  def setup
    @ipapi_data = JSON.parse(
      File.read(Rails.root.join("test/fixtures/files/ipapi_91_84_96_0.json"))
    )
  end
  test "parse_company_network_range extracts and converts IP range to CIDR" do
    cidr = Ipapi.parse_company_network_range(@ipapi_data)
    assert_equal "91.84.96.0/19", cidr
  end
  test "parse_company_network_range handles already formatted CIDR" do
    data = { "company" => { "network" => "1.2.3.0/24" } }
    cidr = Ipapi.parse_company_network_range(data)
    assert_equal "1.2.3.0/24", cidr
  end
  test "parse_company_network_range returns nil for invalid range" do
    data = { "company" => { "network" => "invalid" } }
    cidr = Ipapi.parse_company_network_range(data)
    assert_nil cidr
  end
  test "parse_company_network_range returns nil when no network data present" do
    data = { "company" => {} }
    cidr = Ipapi.parse_company_network_range(data)
    assert_nil cidr
  end
  test "parse_company_network_range falls back to datacenter.network" do
    data = { "datacenter" => { "network" => "1.2.3.0 - 1.2.3.255" } }
    cidr = Ipapi.parse_company_network_range(data)
    assert_equal "1.2.3.0/24", cidr
  end
  test "populate_network_attributes sets all network attributes" do
    network_range = NetworkRange.new(network: "91.84.96.0/24")
    Ipapi.populate_network_attributes(network_range, @ipapi_data)
    assert_equal 216071, network_range.asn
    assert_equal "SERVERS TECH FZCO", network_range.asn_org
    assert_equal "SERVERS TECH FZCO", network_range.company
    assert_equal "NL", network_range.country
    assert network_range.is_datacenter
    refute network_range.is_vpn
    refute network_range.is_proxy
  end
  test "process_ipapi_data creates both company and BGP route networks" do
    # Use a different tracking network so BGP route gets created
    tracking_network = NetworkRange.create!(
      network: "91.84.97.0/24",
      source: "auto_generated"
    )
    assert_difference "NetworkRange.count", 2 do
      result = Ipapi.process_ipapi_data(@ipapi_data, tracking_network)
      assert_equal 2, result[:networks].length
      assert_equal "91.84.96.0/19", result[:broadest_cidr]
    end
    # Verify company network was created
    company_network = NetworkRange.find_by(network: "91.84.96.0/19")
    assert_not_nil company_network
    assert_equal "api_imported", company_network.source
    assert_equal "SERVERS TECH FZCO", company_network.company
    assert company_network.is_datacenter
    # Verify BGP route network was created
    bgp_network = NetworkRange.find_by(network: "91.84.96.0/24")
    assert_not_nil bgp_network
    assert_equal "SERVERS TECH FZCO", bgp_network.company
  end
  test "process_ipapi_data handles missing company network gracefully" do
    # Create data without company network range
    data = @ipapi_data.deep_dup
    data["company"].delete("network")
    data["datacenter"].delete("network")
    tracking_network = NetworkRange.create!(
      network: "91.84.96.0/24",
      source: "auto_generated"
    )
    # Should only create the BGP route network (which matches tracking, so 0 new)
    assert_no_difference "NetworkRange.count" do
      result = Ipapi.process_ipapi_data(data, tracking_network)
      assert_equal 0, result[:networks].length
      assert_equal "91.84.96.0/24", result[:broadest_cidr]
    end
  end
  test "process_ipapi_data updates existing networks instead of creating duplicates" do
    # Pre-create both networks
    company_network = NetworkRange.create!(
      network: "91.84.96.0/19",
      source: "manual",
      company: "Old Company"
    )
    bgp_network = NetworkRange.create!(
      network: "91.84.96.0/24",
      source: "manual"
    )
    tracking_network = NetworkRange.create!(
      network: "91.84.97.0/24",
      source: "auto_generated"
    )
    # Should not create new networks, just update existing ones
    assert_no_difference "NetworkRange.count" do
      result = Ipapi.process_ipapi_data(@ipapi_data, tracking_network)
      assert_equal 2, result[:networks].length
    end
    # Verify updates
    company_network.reload
    assert_equal "SERVERS TECH FZCO", company_network.company
    assert company_network.network_data.key?("ipapi")
  end
 end
--- a/test/services/path_rule_matcher_test.rb
+++ b/test/services/path_rule_matcher_test.rb
@@ -0,0 +1,216 @@
 # frozen_string_literal: true
 require "test_helper"
 class PathRuleMatcherTest < ActiveSupport::TestCase
  setup do
    @user = User.create!(email_address: "test@example.com", password: "password123")
    # Create path segments
    @admin_segment = PathSegment.find_or_create_segment("admin")
    @wp_login_segment = PathSegment.find_or_create_segment("wp-login.php")
    @api_segment = PathSegment.find_or_create_segment("api")
    @v1_segment = PathSegment.find_or_create_segment("v1")
    @users_segment = PathSegment.find_or_create_segment("users")
    @dashboard_segment = PathSegment.find_or_create_segment("dashboard")
  end
  test "exact match - matches exact path only" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/wp-login.php",
      match_type: "exact",
      action: "deny",
      user: @user
    )
    # Create matching event
    matching_event = create_event_with_segments([@wp_login_segment.id])
    assert PathRuleMatcher.matches?(rule, matching_event), "Should match exact path"
    # Create non-matching event (extra segment)
    non_matching_event = create_event_with_segments([@admin_segment.id, @wp_login_segment.id])
    refute PathRuleMatcher.matches?(rule, non_matching_event), "Should not match path with extra segments"
  end
  test "prefix match - matches paths starting with pattern" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin",
      match_type: "prefix",
      action: "deny",
      user: @user
    )
    # Should match /admin
    event1 = create_event_with_segments([@admin_segment.id])
    assert PathRuleMatcher.matches?(rule, event1), "Should match exact prefix"
    # Should match /admin/dashboard
    event2 = create_event_with_segments([@admin_segment.id, @dashboard_segment.id])
    assert PathRuleMatcher.matches?(rule, event2), "Should match prefix with additional segments"
    # Should match /admin/users/123
    event3 = create_event_with_segments([@admin_segment.id, @users_segment.id, create_segment("123").id])
    assert PathRuleMatcher.matches?(rule, event3), "Should match prefix with multiple additional segments"
    # Should NOT match /api/admin (admin not at start)
    event4 = create_event_with_segments([@api_segment.id, @admin_segment.id])
    refute PathRuleMatcher.matches?(rule, event4), "Should not match when pattern not at start"
  end
  test "suffix match - matches paths ending with pattern" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/wp-login.php",
      match_type: "suffix",
      action: "deny",
      user: @user
    )
    # Should match /wp-login.php
    event1 = create_event_with_segments([@wp_login_segment.id])
    assert PathRuleMatcher.matches?(rule, event1), "Should match exact suffix"
    # Should match /admin/wp-login.php
    event2 = create_event_with_segments([@admin_segment.id, @wp_login_segment.id])
    assert PathRuleMatcher.matches?(rule, event2), "Should match suffix with prefix segments"
    # Should match /backup/admin/wp-login.php
    backup_seg = create_segment("backup")
    event3 = create_event_with_segments([backup_seg.id, @admin_segment.id, @wp_login_segment.id])
    assert PathRuleMatcher.matches?(rule, event3), "Should match suffix with multiple prefix segments"
    # Should NOT match /wp-login.php/test (suffix has extra segment)
    test_seg = create_segment("test")
    event4 = create_event_with_segments([@wp_login_segment.id, test_seg.id])
    refute PathRuleMatcher.matches?(rule, event4), "Should not match when pattern not at end"
  end
  test "contains match - matches paths containing pattern" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/admin",
      match_type: "contains",
      action: "deny",
      user: @user
    )
    # Should match /admin
    event1 = create_event_with_segments([@admin_segment.id])
    assert PathRuleMatcher.matches?(rule, event1), "Should match exact contains"
    # Should match /api/admin/users
    event2 = create_event_with_segments([@api_segment.id, @admin_segment.id, @users_segment.id])
    assert PathRuleMatcher.matches?(rule, event2), "Should match contains in middle"
    # Should match /super/secret/admin/panel
    super_seg = create_segment("super")
    secret_seg = create_segment("secret")
    panel_seg = create_segment("panel")
    event3 = create_event_with_segments([super_seg.id, secret_seg.id, @admin_segment.id, panel_seg.id])
    assert PathRuleMatcher.matches?(rule, event3), "Should match contains with prefix and suffix"
    # Should NOT match /administrator (different segment)
    administrator_seg = create_segment("administrator")
    event4 = create_event_with_segments([administrator_seg.id])
    refute PathRuleMatcher.matches?(rule, event4), "Should not match different segment"
  end
  test "contains match with multi-segment pattern" do
    rule = Rule.create_path_pattern_rule(
      pattern: "/api/admin",
      match_type: "contains",
      action: "deny",
      user: @user
    )
    # Should match /api/admin
    event1 = create_event_with_segments([@api_segment.id, @admin_segment.id])
    assert PathRuleMatcher.matches?(rule, event1), "Should match exact contains"
    # Should match /v1/api/admin/users
    event2 = create_event_with_segments([@v1_segment.id, @api_segment.id, @admin_segment.id, @users_segment.id])
    assert PathRuleMatcher.matches?(rule, event2), "Should match consecutive segments in middle"
    # Should NOT match /api/v1/admin (segments not consecutive)
    event3 = create_event_with_segments([@api_segment.id, @v1_segment.id, @admin_segment.id])
    refute PathRuleMatcher.matches?(rule, event3), "Should not match non-consecutive segments"
  end
  test "case insensitive matching through PathSegment normalization" do
    # PathSegment.find_or_create_segment normalizes to lowercase
    rule = Rule.create_path_pattern_rule(
      pattern: "/Admin/Users",  # Mixed case
      match_type: "exact",
      action: "deny",
      user: @user
    )
    # Event with lowercase path should match
    event = create_event_with_segments([@admin_segment.id, @users_segment.id])
    assert PathRuleMatcher.matches?(rule, event), "Should match case-insensitively"
  end
  test "matching_rules returns all matching rules" do
    rule1 = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "prefix", action: "deny", user: @user)
    rule2 = Rule.create_path_pattern_rule(pattern: "/admin/users", match_type: "exact", action: "allow", user: @user)
    rule3 = Rule.create_path_pattern_rule(pattern: "/api", match_type: "prefix", action: "deny", user: @user)
    event = create_event_with_segments([@admin_segment.id, @users_segment.id])
    matching = PathRuleMatcher.matching_rules(event)
    assert_includes matching, rule1, "Should include prefix rule"
    assert_includes matching, rule2, "Should include exact rule"
    refute_includes matching, rule3, "Should not include non-matching rule"
  end
  test "evaluate returns first matching action" do
    Rule.create_path_pattern_rule(pattern: "/admin", match_type: "prefix", action: "deny", user: @user)
    event = create_event_with_segments([@admin_segment.id, @dashboard_segment.id])
    action = PathRuleMatcher.evaluate(event)
    assert_equal "deny", action, "Should return deny action"
  end
  test "evaluate returns allow for non-matching event" do
    Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
    event = create_event_with_segments([@api_segment.id])
    action = PathRuleMatcher.evaluate(event)
    assert_equal "allow", action, "Should return allow for non-matching event"
  end
  test "does not match disabled rules" do
    rule = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
    rule.update!(enabled: false)
    event = create_event_with_segments([@admin_segment.id])
    matching = PathRuleMatcher.matching_rules(event)
    assert_empty matching, "Should not match disabled rules"
  end
  test "does not match expired rules" do
    rule = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
    rule.update!(expires_at: 1.hour.ago)
    event = create_event_with_segments([@admin_segment.id])
    matching = PathRuleMatcher.matching_rules(event)
    assert_empty matching, "Should not match expired rules"
  end
  private
  def create_event_with_segments(segment_ids)
    Event.new(
      request_id: SecureRandom.uuid,
      timestamp: Time.current,
      request_segment_ids: segment_ids,
      ip_address: "1.2.3.4"
    )
  end
  def create_segment(text)
    PathSegment.find_or_create_segment(text)
  end
 end
Author	SHA1	Message	Date
Dan Milne	3f274c842c	Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks	2025-11-18 16:40:05 +11:00
Dan Milne	ef56779584	Add DuckDB to the Docker build and the Gem	2025-11-18 16:37:54 +11:00