Drop add_headers - headers can now be added to meta[] to be applied for any action. Consilidate Tagging in a service

This commit is contained in:
Dan Milne
2025-11-30 13:18:17 +11:00
parent de2eb43e2b
commit 179563022e
9 changed files with 157 additions and 97 deletions

View File

@@ -261,12 +261,6 @@ def process_quick_create_parameters
# Ensure metadata is a hash # Ensure metadata is a hash
@rule.metadata = {} unless @rule.metadata.is_a?(Hash) @rule.metadata = {} unless @rule.metadata.is_a?(Hash)
# Handle add_header fields - use provided params or existing metadata values
if @rule.add_header_action? && (params[:header_name].present? || params[:header_value].present?)
@rule.metadata['header_name'] = params[:header_name].presence || @rule.metadata['header_name'] || 'X-Bot-Agent'
@rule.metadata['header_value'] = params[:header_value].presence || @rule.metadata['header_value'] || 'Unknown'
end
# Handle expires_at parsing for text input # Handle expires_at parsing for text input
if params.dig(:rule, :expires_at).present? if params.dig(:rule, :expires_at).present?
expires_at_str = params[:rule][:expires_at].strip expires_at_str = params[:rule][:expires_at].strip

View File

@@ -226,8 +226,8 @@ class Event < ApplicationRecord
# Normalize headers in payload during import phase # Normalize headers in payload during import phase
normalized_payload = normalize_payload_headers(payload) normalized_payload = normalize_payload_headers(payload)
# Create the WAF request event # Create the WAF request event with agent-provided tags
create!( event = create!(
request_id: request_id, request_id: request_id,
timestamp: parse_timestamp(normalized_payload["timestamp"]), timestamp: parse_timestamp(normalized_payload["timestamp"]),
payload: normalized_payload, payload: normalized_payload,
@@ -250,11 +250,18 @@ class Event < ApplicationRecord
server_name: normalized_payload["server_name"], server_name: normalized_payload["server_name"],
environment: normalized_payload["environment"], environment: normalized_payload["environment"],
# Tags: start with agent-provided tags only
tags: normalized_payload["tags"] || [],
# WAF agent info # WAF agent info
agent_version: normalized_payload.dig("agent", "version"), agent_version: normalized_payload.dig("agent", "version"),
agent_name: normalized_payload.dig("agent", "name") agent_name: normalized_payload.dig("agent", "name")
) )
# Apply rule tags using EventTagger service
EventTagger.tag_event(event)
event
end end
# Normalize headers in payload to lower case during import phase # Normalize headers in payload to lower case during import phase
@@ -347,7 +354,10 @@ class Event < ApplicationRecord
def tags def tags
# Use the dedicated tags column (array), fallback to payload during transition # Use the dedicated tags column (array), fallback to payload during transition
super.presence || (payload&.dig("tags") || []) # Ensure we always return an Array, even if payload has malformed data (e.g., {} instead of [])
result = super.presence || payload&.dig("tags")
return [] if result.nil?
result.is_a?(Array) ? result : []
end end
def headers def headers

View File

@@ -34,8 +34,10 @@ class EventDdb
SQL SQL
# Convert to hash like ActiveRecord .group.count returns # Convert to hash like ActiveRecord .group.count returns
# DuckDB returns arrays: [waf_action, count] # DuckDB returns integer enum values, map to string names
result.to_a.to_h { |row| [row[0], row[1]] } # 0=deny, 1=allow, 2=redirect, 3=challenge, 4=log
action_map = { 0 => "deny", 1 => "allow", 2 => "redirect", 3 => "challenge", 4 => "log" }
result.to_a.to_h { |row| [action_map[row[0]] || "unknown", row[1]] }
end end
rescue StandardError => e rescue StandardError => e
Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}" Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}"

View File

@@ -7,7 +7,8 @@
class Rule < ApplicationRecord class Rule < ApplicationRecord
# Rule enums (prefix needed to avoid rate_limit collision) # Rule enums (prefix needed to avoid rate_limit collision)
# Canonical WAF action order - aligned with Agent and Event models # Canonical WAF action order - aligned with Agent and Event models
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4, add_header: 5 }, prefix: :action # Note: allow and log actions can include headers/tags in metadata for automatic injection
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
@@ -120,10 +121,6 @@ class Rule < ApplicationRecord
action_challenge? action_challenge?
end end
def add_header_action?
action_add_header?
end
# Redirect/challenge convenience methods # Redirect/challenge convenience methods
def redirect_url def redirect_url
metadata_hash['redirect_url'] metadata_hash['redirect_url']
@@ -141,14 +138,6 @@ class Rule < ApplicationRecord
metadata&.dig('challenge_message') metadata&.dig('challenge_message')
end end
def header_name
metadata&.dig('header_name')
end
def header_value
metadata&.dig('header_value')
end
# Tag-related methods # Tag-related methods
def tags def tags
metadata_hash['tags'] || [] metadata_hash['tags'] || []
@@ -469,12 +458,6 @@ class Rule < ApplicationRecord
if source&.start_with?('auto:') || source == 'default' if source&.start_with?('auto:') || source == 'default'
self.user ||= User.find_by(role: 1) # admin role self.user ||= User.find_by(role: 1) # admin role
end end
# Set default header values for add_header action
if add_header_action?
self.metadata['header_name'] ||= 'X-Bot-Agent'
self.metadata['header_value'] ||= 'Unknown'
end
end end
def calculate_priority_for_network_rules def calculate_priority_for_network_rules
@@ -558,13 +541,6 @@ class Rule < ApplicationRecord
if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value) if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work") errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
end end
when "add_header"
unless metadata&.dig("header_name").present?
errors.add(:metadata, "must include 'header_name' for add_header action")
end
unless metadata&.dig("header_value").present?
errors.add(:metadata, "must include 'header_value' for add_header action")
end
end end
end end

View File

@@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord
POLICY_TYPES = %w[country asn company network_type path_pattern].freeze POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
# Actions - what to do when traffic matches this policy # Actions - what to do when traffic matches this policy
ACTIONS = %w[allow deny redirect challenge add_header].freeze ACTIONS = %w[allow deny redirect challenge log].freeze
# Associations # Associations
belongs_to :user belongs_to :user
@@ -25,7 +25,6 @@ validate :targets_must_be_array
validate :validate_targets_by_type validate :validate_targets_by_type
validate :validate_redirect_configuration, if: :redirect_policy_action? validate :validate_redirect_configuration, if: :redirect_policy_action?
validate :validate_challenge_configuration, if: :challenge_policy_action? validate :validate_challenge_configuration, if: :challenge_policy_action?
validate :validate_add_header_configuration, if: :add_header_policy_action?
# Scopes # Scopes
scope :enabled, -> { where(enabled: true) } scope :enabled, -> { where(enabled: true) }
@@ -96,10 +95,6 @@ validate :targets_must_be_array
policy_action == 'challenge' policy_action == 'challenge'
end end
def add_header_policy_action?
policy_action == 'add_header'
end
# Lifecycle methods # Lifecycle methods
def active? def active?
enabled? && !expired? enabled? && !expired?
@@ -168,7 +163,7 @@ validate :targets_must_be_array
priority: network_range.prefix_length priority: network_range.prefix_length
) )
# Handle redirect/challenge/add_header specific data # Handle redirect/challenge specific data
if redirect_action? && additional_data['redirect_url'] if redirect_action? && additional_data['redirect_url']
rule.update!( rule.update!(
metadata: rule.metadata.merge( metadata: rule.metadata.merge(
@@ -183,13 +178,6 @@ validate :targets_must_be_array
challenge_message: additional_data['challenge_message'] challenge_message: additional_data['challenge_message']
) )
) )
elsif add_header_action?
rule.update!(
metadata: rule.metadata.merge(
header_name: additional_data['header_name'],
header_value: additional_data['header_value']
)
)
end end
rule rule
@@ -224,7 +212,7 @@ validate :targets_must_be_array
priority: 50 # Default priority for path rules priority: 50 # Default priority for path rules
) )
# Handle redirect/challenge/add_header specific data # Handle redirect/challenge specific data
if redirect_action? && additional_data['redirect_url'] if redirect_action? && additional_data['redirect_url']
rule.update!( rule.update!(
metadata: rule.metadata.merge( metadata: rule.metadata.merge(
@@ -239,13 +227,6 @@ validate :targets_must_be_array
challenge_message: additional_data['challenge_message'] challenge_message: additional_data['challenge_message']
) )
) )
elsif add_header_action?
rule.update!(
metadata: rule.metadata.merge(
header_name: additional_data['header_name'],
header_value: additional_data['header_value']
)
)
end end
rule rule
@@ -365,12 +346,6 @@ validate :targets_must_be_array
self.targets ||= [] self.targets ||= []
self.additional_data ||= {} self.additional_data ||= {}
self.enabled = true if enabled.nil? self.enabled = true if enabled.nil?
# Set default header values for add_header action
if add_header_policy_action?
self.additional_data['header_name'] ||= 'X-Bot-Agent'
self.additional_data['header_value'] ||= 'Unknown'
end
end end
def targets_must_be_array def targets_must_be_array
@@ -455,15 +430,6 @@ validate :targets_must_be_array
end end
end end
def validate_add_header_configuration
if additional_data['header_name'].blank?
errors.add(:additional_data, "must include 'header_name' for add_header action")
end
if additional_data['header_value'].blank?
errors.add(:additional_data, "must include 'header_value' for add_header action")
end
end
# Matching logic for different policy types # Matching logic for different policy types
def matches_country?(network_range) def matches_country?(network_range)
country = network_range.country || network_range.inherited_intelligence[:country] country = network_range.country || network_range.inherited_intelligence[:country]

View File

@@ -25,7 +25,7 @@
<div> <div>
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %> <%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
<%= form.select :waf_action, <%= form.select :waf_action,
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Add Header', 'add_header']], params[:waf_action]), options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Log', 'log']], params[:waf_action]),
{ }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %> { }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
</div> </div>
<div> <div>

View File

@@ -159,27 +159,6 @@
</div> </div>
</div> </div>
<!-- Add Header Fields (shown for add_header action) -->
<div id="add_header_section" class="hidden space-y-4" data-rule-form-target="addHeaderSection">
<div>
<%= label_tag :header_name, "Header Name", class: "block text-sm font-medium text-gray-700" %>
<%= text_field_tag :header_name, "",
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
placeholder: "X-Bot-Agent",
id: "header_name_input" %>
<p class="mt-2 text-sm text-gray-500">The HTTP header name to add (e.g., X-Bot-Agent, X-Network-Type)</p>
</div>
<div>
<%= label_tag :header_value, "Header Value", class: "block text-sm font-medium text-gray-700" %>
<%= text_field_tag :header_value, "",
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
placeholder: "BingBot",
id: "header_value_input" %>
<p class="mt-2 text-sm text-gray-500">The value for the header (e.g., BingBot, GoogleBot, Unknown)</p>
</div>
</div>
<!-- Metadata --> <!-- Metadata -->
<div data-controller="json-validator" data-json-validator-valid-class="json-valid" data-json-validator-invalid-class="json-invalid" data-json-validator-valid-status-class="json-valid-status" data-json-validator-invalid-status-class="json-invalid-status"> <div data-controller="json-validator" data-json-validator-valid-class="json-valid" data-json-validator-invalid-class="json-invalid" data-json-validator-valid-status-class="json-valid-status" data-json-validator-invalid-status-class="json-invalid-status">
<%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %> <%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %>

View File

@@ -0,0 +1,6 @@
# frozen_string_literal: true
# Configure DeviceDetector cache
# Default is 5,000 entries - we increase to 10,000 for better hit rate
# Memory usage: ~1-2MB for 10k cached user agents
DeviceDetector.config.max_cache_keys = 10_000

127
lib/tasks/duckdb.rake Normal file
View File

@@ -0,0 +1,127 @@
# frozen_string_literal: true
namespace :duckdb do
desc "Rebuild DuckDB analytics database from scratch"
task rebuild: :environment do
puts "=" * 80
puts "DuckDB Rebuild"
puts "=" * 80
puts
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
# Step 1: Check if DuckDB exists
if File.exist?(duckdb_path)
puts "🗑️ Deleting existing DuckDB database..."
File.delete(duckdb_path)
puts " ✅ Deleted: #{duckdb_path}"
puts
else
puts " No existing DuckDB database found"
puts
end
# Step 2: Rebuild from PostgreSQL
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
puts
start_time = Time.current
begin
SyncEventsToDuckdbJob.perform_now
duration = Time.current - start_time
# Step 3: Verify the rebuild
event_count = AnalyticsDuckdbService.instance.event_count
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
result.first&.first || 0
end
puts "=" * 80
puts "✅ DuckDB Rebuild Complete!"
puts "=" * 80
puts " Duration: #{duration.round(2)}s"
puts " Total events synced: #{event_count}"
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
puts
puts "📂 Database location: #{duckdb_path}"
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
puts
rescue => e
puts "❌ Error rebuilding DuckDB: #{e.message}"
puts e.backtrace.first(5).join("\n")
exit 1
end
end
desc "Show DuckDB statistics"
task stats: :environment do
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
unless File.exist?(duckdb_path)
puts "❌ DuckDB database not found at: #{duckdb_path}"
exit 1
end
puts "=" * 80
puts "DuckDB Statistics"
puts "=" * 80
puts
total = AnalyticsDuckdbService.instance.event_count
AnalyticsDuckdbService.instance.with_connection do |conn|
# Bot breakdown
result = conn.query(<<~SQL)
SELECT
is_bot,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips
FROM events
GROUP BY is_bot
SQL
puts "📊 Bot Traffic Breakdown:"
result.each do |row|
type = row[0] ? "🤖 Bots" : "👤 Humans"
count = row[1]
ips = row[2]
percentage = (count.to_f / total * 100).round(1)
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
end
puts
# Date range
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
min_ts, max_ts = range_result.first
puts "📅 Date Range:"
puts " Oldest event: #{min_ts}"
puts " Newest event: #{max_ts}"
puts
# Database info
puts "💾 Database Info:"
puts " Location: #{duckdb_path}"
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
puts " Total events: #{total}"
puts
end
end
desc "Sync new events from PostgreSQL to DuckDB"
task sync: :environment do
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
start_time = Time.current
begin
SyncEventsToDuckdbJob.perform_now
duration = Time.current - start_time
puts "✅ Sync complete in #{duration.round(2)}s"
rescue => e
puts "❌ Error syncing: #{e.message}"
exit 1
end
end
end