Accepts incoming events and correctly parses them into events. GeoLite2 integration complete"

This commit is contained in:
Dan Milne
2025-11-04 00:11:10 +11:00
parent 0cbd462e7c
commit 5ff166613e
49 changed files with 4489 additions and 322 deletions

View File

@@ -45,6 +45,12 @@ gem "image_processing", "~> 1.2"
# Pagination # Pagination
gem "pagy" gem "pagy"
# MaxMind GeoIP database reader
gem "maxmind-db"
# HTTP client for database downloads
gem "httparty"
group :development, :test do group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem "debug", platforms: %i[ mri windows ], require: "debug/prelude" gem "debug", platforms: %i[ mri windows ], require: "debug/prelude"

View File

@@ -102,6 +102,7 @@ GEM
concurrent-ruby (1.3.5) concurrent-ruby (1.3.5)
connection_pool (2.5.4) connection_pool (2.5.4)
crass (1.0.6) crass (1.0.6)
csv (3.3.5)
date (3.5.0) date (3.5.0)
debug (1.11.0) debug (1.11.0)
irb (~> 1.10) irb (~> 1.10)
@@ -125,6 +126,10 @@ GEM
raabro (~> 1.4) raabro (~> 1.4)
globalid (1.3.0) globalid (1.3.0)
activesupport (>= 6.1) activesupport (>= 6.1)
httparty (0.23.2)
csv
mini_mime (>= 1.0.0)
multi_xml (>= 0.5.2)
i18n (1.14.7) i18n (1.14.7)
concurrent-ruby (~> 1.0) concurrent-ruby (~> 1.0)
image_processing (1.14.0) image_processing (1.14.0)
@@ -168,11 +173,14 @@ GEM
net-smtp net-smtp
marcel (1.1.0) marcel (1.1.0)
matrix (0.4.3) matrix (0.4.3)
maxmind-db (1.3.2)
mini_magick (5.3.1) mini_magick (5.3.1)
logger logger
mini_mime (1.1.5) mini_mime (1.1.5)
minitest (5.26.0) minitest (5.26.0)
msgpack (1.8.0) msgpack (1.8.0)
multi_xml (0.7.2)
bigdecimal (~> 3.1)
net-imap (0.5.12) net-imap (0.5.12)
date date
net-protocol net-protocol
@@ -402,10 +410,12 @@ DEPENDENCIES
bundler-audit bundler-audit
capybara capybara
debug debug
httparty
image_processing (~> 1.2) image_processing (~> 1.2)
importmap-rails importmap-rails
jbuilder jbuilder
kamal kamal
maxmind-db
pagy pagy
propshaft propshaft
puma (>= 5.0) puma (>= 5.0)

View File

@@ -1,6 +1,6 @@
# Baffle Hub # Baffle Hub
**Rails 8 WAF analytics and automated rule management system** **Rails 8 WAF analytics and automated rule management system** ⚠️ **Experimental**
Baffle Hub provides intelligent Web Application Firewall (WAF) analytics with automated rule generation. It combines real-time threat detection with SQLite-based local storage for ultra-fast request filtering. Baffle Hub provides intelligent Web Application Firewall (WAF) analytics with automated rule generation. It combines real-time threat detection with SQLite-based local storage for ultra-fast request filtering.
@@ -12,12 +12,36 @@ Baffle Hub provides intelligent Web Application Firewall (WAF) analytics with au
- **Forward Auth Integration** - Compatible with Caddy, Traefik, and NGINX - **Forward Auth Integration** - Compatible with Caddy, Traefik, and NGINX
- **Docker Ready** - Containerized deployment with Kamal - **Docker Ready** - Containerized deployment with Kamal
## Status
### ✅ Complete
- Event ingestion API with DSN authentication
- Comprehensive data normalization (hosts, paths, IPs)
- Basic analytics dashboard
- Background job processing system
- Docker deployment setup
### 🚧 In Progress
- Rule management framework
- IP range blocking rules
- Country-based blocking (via IP ranges)
- Forward auth endpoint implementation
### 📋 TODO
- Advanced pattern analysis and threat detection
- Automatic rule generation algorithms
- Rate limiting engine
- Challenge/redirect mechanisms
- Unix socket support for ultra-low latency
- Multi-node rule synchronization
- Advanced analytics visualizations
- Real-time rule updates
## Quick Start ## Quick Start
### Prerequisites ### Prerequisites
- Ruby 3.x - Ruby 3.x
- PostgreSQL 14+
- Docker (optional) - Docker (optional)
### Installation ### Installation

View File

@@ -67,7 +67,8 @@ class Api::EventsController < ApplicationController
headers = {} headers = {}
important_headers.each do |header| important_headers.each do |header|
value = request.headers[header] value = request.headers[header]
headers[header] = value if value.present? # Standardize headers to lower case during import phase
headers[header.downcase] = value if value.present?
end end
headers headers

View File

@@ -0,0 +1,67 @@
# frozen_string_literal: true
module Api
class RulesController < ApplicationController
skip_before_action :verify_authenticity_token
before_action :authenticate_project!
before_action :check_project_enabled
# GET /api/:public_key/rules/version
# Quick version check - returns latest updated_at timestamp
def version
render json: {
version: Rule.latest_version,
count: Rule.active.count,
sampling: HubLoad.current_sampling
}
end
# GET /api/:public_key/rules?since=2024-11-03T12:00:00.000Z
# Incremental sync - returns rules updated since timestamp
# GET /api/:public_key/rules
# Full sync - returns all active rules
def index
rules = if params[:since].present?
# Incremental sync
since_time = parse_timestamp(params[:since])
Rule.since(since_time)
else
# Full sync - only return enabled rules
Rule.active.sync_order
end
render json: {
version: Rule.latest_version,
sampling: HubLoad.current_sampling,
rules: rules.map(&:to_agent_format)
}
rescue ArgumentError => e
render json: { error: "Invalid timestamp format: #{e.message}" }, status: :bad_request
end
private
def authenticate_project!
public_key = params[:public_key] || params[:project_id]
@project = Project.find_by(public_key: public_key)
unless @project
render json: { error: "Invalid project key" }, status: :unauthorized
return
end
end
def check_project_enabled
unless @project.enabled?
render json: { error: "Project is disabled" }, status: :forbidden
end
end
def parse_timestamp(timestamp_str)
Time.parse(timestamp_str)
rescue ArgumentError => e
raise ArgumentError, "Invalid timestamp format. Expected ISO8601 format (e.g., 2024-11-03T12:00:00.000Z)"
end
end
end

View File

@@ -0,0 +1,50 @@
# frozen_string_literal: true
# ExpiredRulesCleanupJob - Disables rules that have expired
#
# This job runs periodically (hourly) to find rules with expires_at in the past
# and disables them. Agents will pick up these disabled rules in their next sync
# and remove them from their local evaluation tables.
#
# Schedule: Every hour (configured in initializer or cron)
class ExpiredRulesCleanupJob < ApplicationJob
queue_as :default
def perform
expired_rules = Rule.enabled.expired
Rails.logger.info "ExpiredRulesCleanupJob: Found #{expired_rules.count} expired rules"
return if expired_rules.empty?
# Disable all expired rules in a single update
count = expired_rules.update_all(
enabled: false,
updated_at: Time.current
)
Rails.logger.info "ExpiredRulesCleanupJob: Disabled #{count} expired rules"
# Optionally: Clean up old disabled rules after a retention period
cleanup_old_disabled_rules if should_cleanup_old_rules?
count
end
private
def should_cleanup_old_rules?
# Only cleanup on first hour of the day to avoid too frequent deletion
Time.current.hour == 1
end
def cleanup_old_disabled_rules
# Delete disabled rules older than 30 days (keep for audit trail)
old_rules = Rule.disabled.where("updated_at < ?", 30.days.ago)
if old_rules.any?
count = old_rules.delete_all
Rails.logger.info "ExpiredRulesCleanupJob: Deleted #{count} old disabled rules (>30 days)"
end
end
end

View File

@@ -0,0 +1,117 @@
# frozen_string_literal: true
# PathScannerDetectorJob - Detects IPs hitting scanner paths and auto-bans them
#
# This job analyzes recent events to find IPs hitting common scanner/bot paths
# like /.env, /.git, /wp-admin, etc. When detected, it creates temporary IP
# block rules that expire after 24 hours.
#
# Schedule: Every 5 minutes (configured in initializer or cron)
class PathScannerDetectorJob < ApplicationJob
queue_as :default
# Common paths that scanners/bots hit
SCANNER_PATHS = %w[
/.env
/.git
/wp-admin
/wp-login.php
/phpMyAdmin
/phpmyadmin
/.aws
/.ssh
/admin
/administrator
/.config
/backup
/db_backup
/.DS_Store
/web.config
].freeze
# Minimum hits to be considered a scanner
MIN_SCANNER_HITS = 3
# Look back window
LOOKBACK_WINDOW = 5.minutes
# Ban duration
BAN_DURATION = 24.hours
def perform
scanner_ips = detect_scanner_ips
Rails.logger.info "PathScannerDetectorJob: Found #{scanner_ips.count} scanner IPs"
scanner_ips.each do |ip_data|
create_ban_rule(ip_data)
end
scanner_ips.count
end
private
def detect_scanner_ips
# Find IPs that have hit scanner paths multiple times recently
Event
.where("timestamp > ?", LOOKBACK_WINDOW.ago)
.where("request_path IN (?)", SCANNER_PATHS)
.group(:ip_address)
.select("ip_address, COUNT(*) as hit_count, GROUP_CONCAT(DISTINCT request_path) as paths")
.having("COUNT(*) >= ?", MIN_SCANNER_HITS)
.map do |event|
{
ip: event.ip_address,
hit_count: event.hit_count,
paths: event.paths.to_s.split(",")
}
end
end
def create_ban_rule(ip_data)
ip = ip_data[:ip]
# Check if rule already exists for this IP
existing_rule = Rule.active.network_rules.find_by(
"conditions ->> 'cidr' = ?", "#{ip}/32"
)
if existing_rule
Rails.logger.info "PathScannerDetectorJob: Rule already exists for #{ip}, skipping"
return
end
# Determine if IPv4 or IPv6
addr = IPAddr.new(ip)
rule_type = addr.ipv4? ? "network_v4" : "network_v6"
# Create the ban rule
rule = Rule.create!(
rule_type: rule_type,
action: "deny",
conditions: { cidr: "#{ip}/32" },
priority: 32,
expires_at: BAN_DURATION.from_now,
source: "auto:scanner_detected",
enabled: true,
metadata: {
reason: "Scanner detected: hit #{ip_data[:paths].join(', ')}",
hit_count: ip_data[:hit_count],
paths: ip_data[:paths],
detected_at: Time.current.iso8601,
auto_generated: true
}
)
Rails.logger.info "PathScannerDetectorJob: Created ban rule #{rule.id} for #{ip} (expires: #{rule.expires_at})"
rule
rescue IPAddr::InvalidAddressError => e
Rails.logger.error "PathScannerDetectorJob: Invalid IP address #{ip}: #{e.message}"
nil
rescue ActiveRecord::RecordInvalid => e
Rails.logger.error "PathScannerDetectorJob: Failed to create rule for #{ip}: #{e.message}"
nil
end
end

View File

@@ -28,6 +28,15 @@ class ProcessWafEventJob < ApplicationJob
# Create the WAF event record # Create the WAF event record
event = Event.create_from_waf_payload!(event_id, single_event_data, project) event = Event.create_from_waf_payload!(event_id, single_event_data, project)
# Enrich with geo-location data if missing
if event.ip_address.present? && event.country_code.blank?
begin
event.enrich_geo_location!
rescue => e
Rails.logger.warn "Failed to enrich geo location for event #{event.id}: #{e.message}"
end
end
# Trigger analytics processing # Trigger analytics processing
ProcessWafAnalyticsJob.perform_later(project_id: project_id, event_id: event.id) ProcessWafAnalyticsJob.perform_later(project_id: project_id, event_id: event.id)

View File

@@ -0,0 +1,66 @@
# frozen_string_literal: true
class UpdateGeoIpDatabaseJob < ApplicationJob
queue_as :default
# Schedule this job to run weekly to keep the GeoIP database updated
# Use: UpdateGeoIpDatabaseJob.set(wait: 1.week).perform_later
# Or set up in config/schedule.rb for recurring execution
def perform(force_update: false)
return unless auto_update_enabled?
Rails.logger.info "Starting GeoIP database update check"
if should_update_database? || force_update
success = GeoIpService.update_database!
if success
Rails.logger.info "GeoIP database successfully updated"
else
Rails.logger.error "Failed to update GeoIP database"
end
else
Rails.logger.info "GeoIP database is up to date, no update needed"
end
# No cleanup needed with file-system approach
rescue => e
Rails.logger.error "Error in UpdateGeoIpDatabaseJob: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
end
private
def auto_update_enabled?
Rails.application.config.maxmind.auto_update_enabled
end
def should_update_database?
config = Rails.application.config.maxmind
database_path = default_database_path
# Check if database file exists
return true unless File.exist?(database_path)
# Check if database is outdated
max_age_days = config.max_age_days
file_mtime = File.mtime(database_path)
return true if file_mtime < max_age_days.days.ago
# Check if database file is readable and valid
begin
# Try to open the database to verify it's valid
MaxMind::DB.new(database_path)
false
rescue => e
Rails.logger.warn "GeoIP database file appears to be corrupted: #{e.message}"
true
end
end
def default_database_path
config = Rails.application.config.maxmind
File.join(config.storage_path, config.database_filename)
end
end

View File

@@ -88,40 +88,63 @@ class Event < ApplicationRecord
after_validation :normalize_event_fields, if: :should_normalize? after_validation :normalize_event_fields, if: :should_normalize?
def self.create_from_waf_payload!(event_id, payload, project) def self.create_from_waf_payload!(event_id, payload, project)
# Normalize headers in payload during import phase
normalized_payload = normalize_payload_headers(payload)
# Create the WAF request event # Create the WAF request event
create!( create!(
project: project, project: project,
event_id: event_id, event_id: event_id,
timestamp: parse_timestamp(payload["timestamp"]), timestamp: parse_timestamp(normalized_payload["timestamp"]),
payload: payload, payload: normalized_payload,
# WAF-specific fields # WAF-specific fields
ip_address: payload.dig("request", "ip"), ip_address: normalized_payload.dig("request", "ip"),
user_agent: payload.dig("request", "headers", "User-Agent"), user_agent: normalized_payload.dig("request", "headers", "user-agent") || normalized_payload.dig("request", "headers", "User-Agent"),
request_method: payload.dig("request", "method")&.downcase, # request_method will be set by extract_fields_from_payload + normalize_event_fields
request_path: payload.dig("request", "path"), request_path: normalized_payload.dig("request", "path"),
request_url: payload.dig("request", "url"), request_url: normalized_payload.dig("request", "url"),
request_protocol: payload.dig("request", "protocol"), # request_protocol will be set by extract_fields_from_payload + normalize_event_fields
response_status: payload.dig("response", "status_code"), response_status: normalized_payload.dig("response", "status_code"),
response_time_ms: payload.dig("response", "duration_ms"), response_time_ms: normalized_payload.dig("response", "duration_ms"),
waf_action: normalize_action(payload["waf_action"]), # Normalize incoming action values waf_action: normalize_action(normalized_payload["waf_action"]), # Normalize incoming action values
rule_matched: payload["rule_matched"], rule_matched: normalized_payload["rule_matched"],
blocked_reason: payload["blocked_reason"], blocked_reason: normalized_payload["blocked_reason"],
# Server/Environment info # Server/Environment info
server_name: payload["server_name"], server_name: normalized_payload["server_name"],
environment: payload["environment"], environment: normalized_payload["environment"],
# Geographic data # Geographic data
country_code: payload.dig("geo", "country_code"), country_code: normalized_payload.dig("geo", "country_code"),
city: payload.dig("geo", "city"), city: normalized_payload.dig("geo", "city"),
# WAF agent info # WAF agent info
agent_version: payload.dig("agent", "version"), agent_version: normalized_payload.dig("agent", "version"),
agent_name: payload.dig("agent", "name") agent_name: normalized_payload.dig("agent", "name")
) )
end end
# Normalize headers in payload to lower case during import phase
def self.normalize_payload_headers(payload)
return payload unless payload.is_a?(Hash)
# Create a deep copy to avoid modifying the original
normalized = Marshal.load(Marshal.dump(payload))
# Normalize request headers
if normalized.dig("request", "headers")&.is_a?(Hash)
normalized["request"]["headers"] = normalized["request"]["headers"].transform_keys(&:downcase)
end
# Normalize response headers if they exist
if normalized.dig("response", "headers")&.is_a?(Hash)
normalized["response"]["headers"] = normalized["response"]["headers"].transform_keys(&:downcase)
end
normalized
end
def self.normalize_action(action) def self.normalize_action(action)
return "allow" if action.nil? || action.blank? return "allow" if action.nil? || action.blank?
@@ -195,7 +218,8 @@ class Event < ApplicationRecord
end end
def headers def headers
payload&.dig("request", "headers") || {} raw_headers = payload&.dig("request", "headers") || {}
normalize_headers(raw_headers)
end end
def query_params def query_params
@@ -237,6 +261,69 @@ class Event < ApplicationRecord
URI.parse(request_url).hostname rescue nil URI.parse(request_url).hostname rescue nil
end end
# Normalize headers to lower case keys during import phase
def normalize_headers(headers)
return {} unless headers.is_a?(Hash)
headers.transform_keys(&:downcase)
end
# GeoIP enrichment methods
def enrich_geo_location!
return if ip_address.blank?
return if country_code.present? # Already has geo data
country = GeoIpService.lookup_country(ip_address)
update!(country_code: country) if country.present?
rescue => e
Rails.logger.error "Failed to enrich geo location for event #{id}: #{e.message}"
end
# Class method to enrich multiple events
def self.enrich_geo_location_batch(events = nil)
events ||= where(country_code: [nil, '']).where.not(ip_address: [nil, ''])
geo_service = GeoIpService.new
updated_count = 0
events.find_each do |event|
next if event.country_code.present?
country = geo_service.lookup_country(event.ip_address)
if country.present?
event.update!(country_code: country)
updated_count += 1
end
end
updated_count
end
# Lookup country code for this event's IP
def lookup_country
return country_code if country_code.present?
return nil if ip_address.blank?
GeoIpService.lookup_country(ip_address)
rescue => e
Rails.logger.error "GeoIP lookup failed for #{ip_address}: #{e.message}"
nil
end
# Check if event has valid geo location data
def has_geo_data?
country_code.present? || city.present?
end
# Get full geo location details
def geo_location
{
country_code: country_code,
city: city,
ip_address: ip_address,
has_data: has_geo_data?
}
end
private private
def should_normalize? def should_normalize?
@@ -257,7 +344,12 @@ class Event < ApplicationRecord
response_data = payload.dig("response") || {} response_data = payload.dig("response") || {}
self.ip_address = request_data["ip"] self.ip_address = request_data["ip"]
self.user_agent = request_data.dig("headers", "User-Agent")
# Extract user agent with header name standardization
headers = request_data["headers"] || {}
normalized_headers = normalize_headers(headers)
self.user_agent = normalized_headers["user-agent"] || normalized_headers["User-Agent"]
self.request_path = request_data["path"] self.request_path = request_data["path"]
self.request_url = request_data["url"] self.request_url = request_data["url"]
self.response_status = response_data["status_code"] self.response_status = response_data["status_code"]
@@ -265,10 +357,11 @@ class Event < ApplicationRecord
self.rule_matched = payload["rule_matched"] self.rule_matched = payload["rule_matched"]
self.blocked_reason = payload["blocked_reason"] self.blocked_reason = payload["blocked_reason"]
# Store original values for normalization (these will be normalized to IDs) # Store original values for normalization only if they don't exist yet
@raw_request_method = request_data["method"] # This prevents overwriting during multiple callback runs
@raw_request_protocol = request_data["protocol"] @raw_request_method ||= request_data["method"]
@raw_action = payload["waf_action"] @raw_request_protocol ||= request_data["protocol"]
@raw_action ||= payload["waf_action"]
# Extract server/environment info # Extract server/environment info
self.server_name = payload["server_name"] self.server_name = payload["server_name"]

171
app/models/ipv4_range.rb Normal file
View File

@@ -0,0 +1,171 @@
# frozen_string_literal: true
# Ipv4Range - Stores IPv4 network ranges with IP intelligence metadata
#
# Optimized for fast range lookups using network_start/network_end integers.
# Stores metadata about IP ranges including ASN, company, geographic info,
# and flags for datacenter/proxy/VPN detection.
class Ipv4Range < ApplicationRecord
# Validations
validates :network_start, presence: true
validates :network_end, presence: true
validates :network_prefix, presence: true,
numericality: { greater_than_or_equal_to: 0, less_than_or_equal_to: 32 }
# Callbacks
before_validation :calculate_range, if: -> { cidr.present? }
# Scopes for common queries
scope :datacenter, -> { where(is_datacenter: true) }
scope :proxy, -> { where(is_proxy: true) }
scope :vpn, -> { where(is_vpn: true) }
scope :by_country, ->(country) { where(ip_api_country: country) }
scope :by_company, ->(company) { where(company: company) }
scope :by_asn, ->(asn) { where(asn: asn) }
# Virtual attribute for setting IP via CIDR notation
attr_accessor :cidr
# Find ranges that contain a specific IPv4 address
def self.contains_ip(ip_string)
ip_addr = IPAddr.new(ip_string)
raise ArgumentError, "Not an IPv4 address" unless ip_addr.ipv4?
ip_int = ip_addr.to_i
where("? BETWEEN network_start AND network_end", ip_int)
.order(network_prefix: :desc) # Most specific first
end
# Check if this range contains a specific IP
def contains_ip?(ip_string)
ip_addr = IPAddr.new(ip_string)
return false unless ip_addr.ipv4?
ip_int = ip_addr.to_i
ip_int >= network_start && ip_int <= network_end
end
# Get CIDR notation for this range
def to_cidr
return nil unless network_start.present?
ip_addr = IPAddr.new(network_start, Socket::AF_INET)
"#{ip_addr}/#{network_prefix}"
end
# String representation
def to_s
to_cidr || "Ipv4Range##{id}"
end
# Convenience methods for JSON fields
def abuser_scores_hash
abuser_scores ? JSON.parse(abuser_scores) : {}
rescue JSON::ParserError
{}
end
def abuser_scores_hash=(hash)
self.abuser_scores = hash.to_json
end
def additional_data_hash
additional_data ? JSON.parse(additional_data) : {}
rescue JSON::ParserError
{}
end
def additional_data_hash=(hash)
self.additional_data = hash.to_json
end
# GeoIP lookup methods
def geo_lookup_country!
return if ip_api_country.present? || geo2_country.present?
# Use the first IP in the range for lookup
sample_ip = IPAddr.new(network_start, Socket::AF_INET).to_s
country = GeoIpService.lookup_country(sample_ip)
if country.present?
# Update both country fields for redundancy
update!(ip_api_country: country, geo2_country: country)
country
end
rescue => e
Rails.logger.error "Failed to lookup geo location for IPv4 range #{id}: #{e.message}"
nil
end
def geo_lookup_country
return ip_api_country if ip_api_country.present?
return geo2_country if geo2_country.present?
# Use the first IP in the range for lookup
sample_ip = IPAddr.new(network_start, Socket::AF_INET).to_s
GeoIpService.lookup_country(sample_ip)
rescue => e
Rails.logger.error "Failed to lookup geo location for IPv4 range #{id}: #{e.message}"
nil
end
# Check if this range has any country information
def has_country_info?
ip_api_country.present? || geo2_country.present?
end
# Get the best available country code
def primary_country
ip_api_country || geo2_country
end
# Class method to lookup country for any IP in the range
def self.lookup_country_by_ip(ip_string)
range = contains_ip(ip_string).first
return nil unless range
range.geo_lookup_country
end
# Class method to enrich ranges without country data
def self.enrich_missing_geo_data(limit: 1000)
ranges_without_geo = where(ip_api_country: [nil, ''], geo2_country: [nil, ''])
.limit(limit)
updated_count = 0
geo_service = GeoIpService.new
ranges_without_geo.find_each do |range|
country = geo_service.lookup_country(IPAddr.new(range.network_start, Socket::AF_INET).to_s)
if country.present?
range.update!(ip_api_country: country, geo2_country: country)
updated_count += 1
end
end
updated_count
end
private
# Calculate network_start and network_end from CIDR notation
def calculate_range
return unless cidr.present?
ip_addr = IPAddr.new(cidr)
raise ArgumentError, "Not an IPv4 CIDR" unless ip_addr.ipv4?
# Get prefix from CIDR
self.network_prefix = cidr.split("/").last.to_i
# Calculate network range
first_ip = ip_addr.to_range.first
last_ip = ip_addr.to_range.last
self.network_start = first_ip.to_i
self.network_end = last_ip.to_i
rescue IPAddr::InvalidAddressError => e
errors.add(:cidr, "invalid IPv4 CIDR notation: #{e.message}")
end
end

171
app/models/ipv6_range.rb Normal file
View File

@@ -0,0 +1,171 @@
# frozen_string_literal: true
# Ipv6Range - Stores IPv6 network ranges with IP intelligence metadata
#
# Optimized for fast range lookups using network_start/network_end binary storage.
# Stores metadata about IP ranges including ASN, company, geographic info,
# and flags for datacenter/proxy/VPN detection.
class Ipv6Range < ApplicationRecord
# Validations
validates :network_start, presence: true
validates :network_end, presence: true
validates :network_prefix, presence: true,
numericality: { greater_than_or_equal_to: 0, less_than_or_equal_to: 128 }
# Callbacks
before_validation :calculate_range, if: -> { cidr.present? }
# Scopes for common queries
scope :datacenter, -> { where(is_datacenter: true) }
scope :proxy, -> { where(is_proxy: true) }
scope :vpn, -> { where(is_vpn: true) }
scope :by_country, ->(country) { where(ip_api_country: country) }
scope :by_company, ->(company) { where(company: company) }
scope :by_asn, ->(asn) { where(asn: asn) }
# Virtual attribute for setting IP via CIDR notation
attr_accessor :cidr
# Find ranges that contain a specific IPv6 address
def self.contains_ip(ip_string)
ip_addr = IPAddr.new(ip_string)
raise ArgumentError, "Not an IPv6 address" unless ip_addr.ipv6?
ip_bytes = ip_addr.hton
where("? BETWEEN network_start AND network_end", ip_bytes)
.order(network_prefix: :desc) # Most specific first
end
# Check if this range contains a specific IP
def contains_ip?(ip_string)
ip_addr = IPAddr.new(ip_string)
return false unless ip_addr.ipv6?
ip_bytes = ip_addr.hton
ip_bytes >= network_start && ip_bytes <= network_end
end
# Get CIDR notation for this range
def to_cidr
return nil unless network_start.present?
ip_addr = IPAddr.new_ntoh(network_start)
"#{ip_addr}/#{network_prefix}"
end
# String representation
def to_s
to_cidr || "Ipv6Range##{id}"
end
# Convenience methods for JSON fields
def abuser_scores_hash
abuser_scores ? JSON.parse(abuser_scores) : {}
rescue JSON::ParserError
{}
end
def abuser_scores_hash=(hash)
self.abuser_scores = hash.to_json
end
def additional_data_hash
additional_data ? JSON.parse(additional_data) : {}
rescue JSON::ParserError
{}
end
def additional_data_hash=(hash)
self.additional_data = hash.to_json
end
# GeoIP lookup methods
def geo_lookup_country!
return if ip_api_country.present? || geo2_country.present?
# Use the first IP in the range for lookup
sample_ip = IPAddr.new_ntoh(network_start).to_s
country = GeoIpService.lookup_country(sample_ip)
if country.present?
# Update both country fields for redundancy
update!(ip_api_country: country, geo2_country: country)
country
end
rescue => e
Rails.logger.error "Failed to lookup geo location for IPv6 range #{id}: #{e.message}"
nil
end
def geo_lookup_country
return ip_api_country if ip_api_country.present?
return geo2_country if geo2_country.present?
# Use the first IP in the range for lookup
sample_ip = IPAddr.new_ntoh(network_start).to_s
GeoIpService.lookup_country(sample_ip)
rescue => e
Rails.logger.error "Failed to lookup geo location for IPv6 range #{id}: #{e.message}"
nil
end
# Check if this range has any country information
def has_country_info?
ip_api_country.present? || geo2_country.present?
end
# Get the best available country code
def primary_country
ip_api_country || geo2_country
end
# Class method to lookup country for any IP in the range
def self.lookup_country_by_ip(ip_string)
range = contains_ip(ip_string).first
return nil unless range
range.geo_lookup_country
end
# Class method to enrich ranges without country data
def self.enrich_missing_geo_data(limit: 1000)
ranges_without_geo = where(ip_api_country: [nil, ''], geo2_country: [nil, ''])
.limit(limit)
updated_count = 0
geo_service = GeoIpService.new
ranges_without_geo.find_each do |range|
country = geo_service.lookup_country(IPAddr.new_ntoh(range.network_start).to_s)
if country.present?
range.update!(ip_api_country: country, geo2_country: country)
updated_count += 1
end
end
updated_count
end
private
# Calculate network_start and network_end from CIDR notation
def calculate_range
return unless cidr.present?
ip_addr = IPAddr.new(cidr)
raise ArgumentError, "Not an IPv6 CIDR" unless ip_addr.ipv6?
# Get prefix from CIDR
self.network_prefix = cidr.split("/").last.to_i
# Calculate network range (binary format for IPv6)
first_ip = ip_addr.to_range.first
last_ip = ip_addr.to_range.last
self.network_start = first_ip.hton
self.network_end = last_ip.hton
rescue IPAddr::InvalidAddressError => e
errors.add(:cidr, "invalid IPv6 CIDR notation: #{e.message}")
end
end

View File

@@ -1,63 +0,0 @@
class NetworkRange < ApplicationRecord
validates :ip_address, presence: true
validates :network_prefix, presence: true, numericality: {greater_than_or_equal_to: 0, less_than_or_equal_to: 128}
validates :ip_version, presence: true, inclusion: {in: [4, 6]}
# Convenience methods for JSON fields
def abuser_scores_hash
abuser_scores ? JSON.parse(abuser_scores) : {}
end
def abuser_scores_hash=(hash)
self.abuser_scores = hash.to_json
end
def additional_data_hash
additional_data ? JSON.parse(additional_data) : {}
end
def additional_data_hash=(hash)
self.additional_data = hash.to_json
end
# Scope methods for common queries
scope :ipv4, -> { where(ip_version: 4) }
scope :ipv6, -> { where(ip_version: 6) }
scope :datacenter, -> { where(is_datacenter: true) }
scope :proxy, -> { where(is_proxy: true) }
scope :vpn, -> { where(is_vpn: true) }
scope :by_country, ->(country) { where(ip_api_country: country) }
scope :by_company, ->(company) { where(company: company) }
scope :by_asn, ->(asn) { where(asn: asn) }
# Find network ranges that contain a specific IP address
def self.contains_ip(ip_string)
ip_bytes = IPAddr.new(ip_string).hton
version = ip_string.include?(":") ? 6 : 4
where(ip_version: version).select do |range|
range.contains_ip_bytes?(ip_bytes)
end
end
def contains_ip?(ip_string)
contains_ip_bytes?(IPAddr.new(ip_string).hton)
end
def to_s
"#{ip_address_to_s}/#{network_prefix}"
end
private
def contains_ip_bytes?(ip_bytes)
# This is a simplified version - you'll need proper network math here
# For now, just check if the IP matches exactly
ip_address == ip_bytes
end
def ip_address_to_s
# Convert binary IP back to string representation
IPAddr.ntop(ip_address)
end
end

View File

@@ -1,126 +1,189 @@
# frozen_string_literal: true # frozen_string_literal: true
class Rule < ApplicationRecord class Rule < ApplicationRecord
belongs_to :rule_set # Rule types for the new architecture
RULE_TYPES = %w[network_v4 network_v6 rate_limit path_pattern].freeze
ACTIONS = %w[allow deny rate_limit redirect log].freeze
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default].freeze
validates :rule_type, presence: true, inclusion: { in: RuleSet::RULE_TYPES } # Validations
validates :target, presence: true validates :rule_type, presence: true, inclusion: { in: RULE_TYPES }
validates :action, presence: true, inclusion: { in: RuleSet::ACTIONS } validates :action, presence: true, inclusion: { in: ACTIONS }
validates :priority, presence: true, numericality: { greater_than: 0 } validates :conditions, presence: true
validates :enabled, inclusion: { in: [true, false] }
# Custom validations based on rule type
validate :validate_conditions_by_type
validate :validate_metadata_by_action
# Scopes
scope :enabled, -> { where(enabled: true) } scope :enabled, -> { where(enabled: true) }
scope :by_priority, -> { order(priority: :desc, created_at: :desc) } scope :disabled, -> { where(enabled: false) }
scope :expired, -> { where("expires_at < ?", Time.current) } scope :active, -> { enabled.where("expires_at IS NULL OR expires_at > ?", Time.current) }
scope :not_expired, -> { where("expires_at IS NULL OR expires_at > ?", Time.current) } scope :expired, -> { where("expires_at IS NOT NULL AND expires_at <= ?", Time.current) }
scope :by_type, ->(type) { where(rule_type: type) }
scope :network_rules, -> { where(rule_type: ["network_v4", "network_v6"]) }
scope :rate_limit_rules, -> { where(rule_type: "rate_limit") }
scope :path_pattern_rules, -> { where(rule_type: "path_pattern") }
scope :by_source, ->(source) { where(source: source) }
# Sync queries (ordered by updated_at for incremental sync)
scope :since, ->(timestamp) { where("updated_at >= ?", timestamp - 0.5.seconds).order(:updated_at, :id) }
scope :sync_order, -> { order(:updated_at, :id) }
# Callbacks
before_validation :set_defaults
before_save :calculate_priority_from_cidr
# Check if rule is currently active # Check if rule is currently active
def active? def active?
enabled? && (expires_at.nil? || expires_at > Time.current) enabled? && !expired?
end end
# Check if rule matches given request context def expired?
def matches?(context) expires_at.present? && expires_at <= Time.current
return false unless active?
case rule_type
when 'ip'
match_ip_rule?(context)
when 'cidr'
match_cidr_rule?(context)
when 'path'
match_path_rule?(context)
when 'user_agent'
match_user_agent_rule?(context)
when 'parameter'
match_parameter_rule?(context)
when 'method'
match_method_rule?(context)
when 'country'
match_country_rule?(context)
else
false
end
end end
def to_waf_format # Convert to format for agent consumption
def to_agent_format
{ {
id: id, id: id,
type: rule_type, rule_type: rule_type,
target: target,
action: action, action: action,
conditions: conditions || {}, conditions: conditions || {},
priority: priority, priority: priority,
expires_at: expires_at, expires_at: expires_at&.iso8601,
active: active? enabled: enabled,
source: source,
metadata: metadata || {},
created_at: created_at.iso8601,
updated_at: updated_at.iso8601
} }
end end
# Class method to get latest version (for sync cursor)
def self.latest_version
maximum(:updated_at)&.iso8601(6) || Time.current.iso8601(6)
end
# Disable rule (soft delete)
def disable!(reason: nil)
update!(
enabled: false,
metadata: (metadata || {}).merge(
disabled_at: Time.current.iso8601,
disabled_reason: reason
)
)
end
# Enable rule
def enable!
update!(enabled: true)
end
# Check if this is a network rule
def network_rule?
rule_type.in?(%w[network_v4 network_v6])
end
# Get CIDR from conditions (for network rules)
def cidr
conditions&.dig("cidr") if network_rule?
end
# Get prefix length from CIDR
def prefix_length
return nil unless cidr
cidr.split("/").last.to_i
end
private private
def match_ip_rule?(context) def set_defaults
return false unless context[:ip_address] self.enabled = true if enabled.nil?
self.conditions ||= {}
target == context[:ip_address] self.metadata ||= {}
self.source ||= "manual"
end end
def match_cidr_rule?(context) def calculate_priority_from_cidr
return false unless context[:ip_address] # For network rules, priority is the prefix length (more specific = higher priority)
if network_rule? && cidr.present?
self.priority = prefix_length
end
end
def validate_conditions_by_type
case rule_type
when "network_v4", "network_v6"
validate_network_conditions
when "rate_limit"
validate_rate_limit_conditions
when "path_pattern"
validate_path_pattern_conditions
end
end
def validate_network_conditions
cidr_value = conditions&.dig("cidr")
if cidr_value.blank?
errors.add(:conditions, "must include 'cidr' for network rules")
return
end
# Validate CIDR format
begin begin
range = IPAddr.new(target) addr = IPAddr.new(cidr_value)
range.include?(context[:ip_address])
rescue IPAddr::InvalidAddressError # Check IPv4 vs IPv6 matches rule_type
false if rule_type == "network_v4" && !addr.ipv4?
errors.add(:conditions, "cidr must be IPv4 for network_v4 rules")
elsif rule_type == "network_v6" && !addr.ipv6?
errors.add(:conditions, "cidr must be IPv6 for network_v6 rules")
end
rescue IPAddr::InvalidAddressError => e
errors.add(:conditions, "invalid CIDR format: #{e.message}")
end end
end end
def match_path_rule?(context) def validate_rate_limit_conditions
return false unless context[:request_path] scope = conditions&.dig("scope")
cidr_value = conditions&.dig("cidr")
# Support exact match and regex patterns if scope.blank?
if conditions&.dig('regex') == true errors.add(:conditions, "must include 'scope' for rate_limit rules")
Regexp.new(target).match?(context[:request_path]) end
else
context[:request_path].start_with?(target) if cidr_value.blank?
errors.add(:conditions, "must include 'cidr' for rate_limit rules")
end
# Validate metadata has rate limit config
unless metadata&.dig("limit").present? && metadata&.dig("window").present?
errors.add(:metadata, "must include 'limit' and 'window' for rate_limit rules")
end end
end end
def match_user_agent_rule?(context) def validate_path_pattern_conditions
return false unless context[:user_agent] patterns = conditions&.dig("patterns")
# Support exact match and regex patterns if patterns.blank? || !patterns.is_a?(Array)
if conditions&.dig('regex') == true errors.add(:conditions, "must include 'patterns' array for path_pattern rules")
Regexp.new(target, Regexp::IGNORECASE).match?(context[:user_agent])
else
context[:user_agent].downcase.include?(target.downcase)
end end
end end
def match_parameter_rule?(context) def validate_metadata_by_action
return false unless context[:query_params] case action
when "redirect"
param_name = conditions&.dig('parameter_name') || target unless metadata&.dig("redirect_url").present?
param_value = context[:query_params][param_name] errors.add(:metadata, "must include 'redirect_url' for redirect action")
end
return false unless param_value when "rate_limit"
unless metadata&.dig("limit").present? && metadata&.dig("window").present?
# Check if parameter value matches pattern errors.add(:metadata, "must include 'limit' and 'window' for rate_limit action")
if conditions&.dig('regex') == true
Regexp.new(target, Regexp::IGNORECASE).match?(param_value.to_s)
else
param_value.to_s.downcase.include?(target.downcase)
end end
end end
def match_method_rule?(context)
return false unless context[:request_method]
target.upcase == context[:request_method].upcase
end
def match_country_rule?(context)
return false unless context[:country_code]
target.upcase == context[:country_code].upcase
end end
end end

View File

@@ -47,7 +47,7 @@ class EventNormalizer
else :allow else :allow
end end
@event.action = action_enum @event.waf_action = action_enum
end end
def normalize_method def normalize_method

View File

@@ -0,0 +1,174 @@
# frozen_string_literal: true
require 'maxmind/db'
require 'httparty'
require 'digest'
require 'tmpdir'
require 'fileutils'
class GeoIpService
include HTTParty
class DatabaseNotAvailable < StandardError; end
class InvalidIpAddress < StandardError; end
attr_reader :database_reader, :database_path
def initialize(database_path: nil)
@database_path = database_path || default_database_path
@database_reader = nil
load_database if File.exist?(@database_path)
end
# Main lookup method - returns country code for IP address
def lookup_country(ip_address)
return fallback_country unless database_available?
return fallback_country unless valid_ip?(ip_address)
result = database_reader.get(ip_address)
return fallback_country if result.nil? || result.empty?
# Extract country code from MaxMind result
result['country']&.[]('iso_code') || fallback_country
rescue => e
Rails.logger.error "GeoIP lookup failed for #{ip_address}: #{e.message}"
fallback_country
end
# Check if database is available and loaded
def database_available?
return false unless File.exist?(@database_path)
load_database unless database_reader
database_reader.present?
end
# Get database information
def database_info
return nil unless File.exist?(@database_path)
file_stat = File.stat(@database_path)
metadata = database_reader&.metadata
if metadata
{
type: 'GeoLite2-Country',
version: "#{metadata.binary_format_major_version}.#{metadata.binary_format_minor_version}",
size: file_stat.size,
modified_at: file_stat.mtime,
age_days: ((Time.current - file_stat.mtime) / 1.day).round,
file_path: @database_path
}
else
{
type: 'GeoLite2-Country',
version: 'Unknown',
size: file_stat.size,
modified_at: file_stat.mtime,
age_days: ((Time.current - file_stat.mtime) / 1.day).round,
file_path: @database_path
}
end
end
# Class method for convenience lookup
def self.lookup_country(ip_address)
new.lookup_country(ip_address)
end
# Update database from remote source
def self.update_database!
new.update_from_remote!
end
# Download and install database from remote URL
def update_from_remote!
config = Rails.application.config.maxmind
database_url = config.database_url
storage_path = config.storage_path
database_filename = config.database_filename
temp_file = nil
Rails.logger.info "Starting GeoIP database download from #{database_url}"
begin
# Ensure storage directory exists
FileUtils.mkdir_p(storage_path) unless Dir.exist?(storage_path)
# Download to temporary file
Dir.mktmpdir do |temp_dir|
temp_file = File.join(temp_dir, database_filename)
response = HTTParty.get(database_url, timeout: 60)
raise "Failed to download database: #{response.code}" unless response.success?
File.binwrite(temp_file, response.body)
# Validate downloaded file
validate_downloaded_file(temp_file)
# Move to final location
final_path = File.join(storage_path, database_filename)
File.rename(temp_file, final_path)
# Reload the database with new file
@database_reader = nil
load_database
Rails.logger.info "GeoIP database successfully updated: #{final_path}"
return true
end
rescue => e
Rails.logger.error "Failed to update GeoIP database: #{e.message}"
File.delete(temp_file) if temp_file && File.exist?(temp_file)
false
end
end
private
def load_database
return unless File.exist?(@database_path)
@database_reader = MaxMind::DB.new(@database_path)
rescue => e
Rails.logger.error "Failed to load GeoIP database: #{e.message}"
@database_reader = nil
end
def default_database_path
config = Rails.application.config.maxmind
File.join(config.storage_path, config.database_filename)
end
def valid_ip?(ip_address)
IPAddr.new(ip_address)
true
rescue IPAddr::InvalidAddressError
false
end
def fallback_country
config = Rails.application.config.maxmind
return nil unless config.enable_fallback
config.fallback_country
end
def cache_size
return 0 unless Rails.application.config.maxmind.cache_enabled
Rails.application.config.maxmind.cache_size
end
def validate_downloaded_file(file_path)
# Basic file existence and size check
raise "Downloaded file is empty" unless File.exist?(file_path)
raise "Downloaded file is too small" if File.size(file_path) < 1_000_000 # ~1MB minimum
# Try to open with MaxMind reader
begin
MaxMind::DB.new(file_path)
rescue => e
raise "Invalid MaxMind database format: #{e.message}"
end
end
end

78
app/services/hub_load.rb Normal file
View File

@@ -0,0 +1,78 @@
# frozen_string_literal: true
# HubLoad - Calculates dynamic event sampling rate based on system load
#
# This service monitors SolidQueue depth and adjusts sampling rates to prevent
# the Hub from being overwhelmed while ensuring critical events are always captured.
class HubLoad
# Queue depth thresholds
THRESHOLDS = {
normal: 0..1_000, # 100% sampling
moderate: 1_001..5_000, # 50% sampling
high: 5_001..10_000, # 20% sampling
critical: 10_001..Float::INFINITY # 5% sampling
}.freeze
SAMPLING_RATES = {
normal: { allowed: 1.0, blocked: 1.0, rate_limited: 1.0 },
moderate: { allowed: 0.5, blocked: 1.0, rate_limited: 1.0 },
high: { allowed: 0.2, blocked: 1.0, rate_limited: 1.0 },
critical: { allowed: 0.05, blocked: 1.0, rate_limited: 1.0 }
}.freeze
# Get current sampling configuration based on load
def self.current_sampling
load_level = calculate_load_level
rates = SAMPLING_RATES[load_level]
{
allowed_requests: rates[:allowed],
blocked_requests: rates[:blocked],
rate_limited_requests: rates[:rate_limited],
effective_until: next_sync_time,
load_level: load_level,
queue_depth: queue_depth
}
end
# Calculate when sampling should be rechecked (next agent sync)
def self.next_sync_time
10.seconds.from_now.iso8601(3)
end
# Get current queue depth
def self.queue_depth
# SolidQueue stores jobs in the jobs table
# Count pending/running jobs only
SolidQueue::Job.where(finished_at: nil).count
rescue StandardError => e
Rails.logger.error "Failed to get queue depth: #{e.message}"
0
end
# Determine load level based on queue depth
def self.calculate_load_level
depth = queue_depth
THRESHOLDS.each do |level, range|
return level if range.cover?(depth)
end
:critical # Fallback
end
# Check if hub is under heavy load
def self.overloaded?
calculate_load_level.in?([:high, :critical])
end
# Get load statistics for monitoring
def self.stats
{
queue_depth: queue_depth,
load_level: calculate_load_level,
sampling_rates: SAMPLING_RATES[calculate_load_level],
overloaded: overloaded?
}
end
end

View File

@@ -32,7 +32,7 @@
</div> </div>
<div class="card-footer"> <div class="card-footer">
<%= link_to "View", project_path(project), class: "btn btn-primary btn-sm" %> <%= link_to "View", project_path(project), class: "btn btn-primary btn-sm" %>
<%= link_to "Events", events_project_path(project), class: "btn btn-secondary btn-sm" %> <%= link_to "Events", project_events_path(project), class: "btn btn-secondary btn-sm" %>
<%= link_to "Analytics", analytics_project_path(project), class: "btn btn-info btn-sm" %> <%= link_to "Analytics", analytics_project_path(project), class: "btn btn-info btn-sm" %>
</div> </div>
</div> </div>

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
require 'fileutils'
# MaxMind GeoIP Configuration
Rails.application.configure do
config.maxmind = ActiveSupport::OrderedOptions.new
# Database configuration
config.maxmind.database_url = ENV.fetch('MAXMIND_DATABASE_URL', 'https://github.com/P3TERX/GeoLite.mmdb/raw/download/GeoLite2-Country.mmdb')
config.maxmind.database_type = 'GeoLite2-Country'
# Local storage paths
config.maxmind.storage_path = Rails.root.join('db', 'geoip')
config.maxmind.database_filename = 'GeoLite2-Country.mmdb'
# Update configuration
config.maxmind.auto_update_enabled = ENV.fetch('MAXMIND_AUTO_UPDATE', 'true').downcase == 'true'
config.maxmind.update_interval_days = ENV.fetch('MAXMIND_UPDATE_INTERVAL_DAYS', 7).to_i
config.maxmind.max_age_days = ENV.fetch('MAXMIND_MAX_AGE_DAYS', 30).to_i
# Note: MaxMind DB has its own internal caching, no additional caching needed
# Fallback settings
config.maxmind.fallback_country = ENV.fetch('MAXMIND_FALLBACK_COUNTRY', nil)
config.maxmind.enable_fallback = ENV.fetch('MAXMIND_ENABLE_FALLBACK', 'false').downcase == 'true'
end
# Ensure storage directory exists
maxmind_storage_path = Rails.application.config.maxmind.storage_path
FileUtils.mkdir_p(maxmind_storage_path) unless Dir.exist?(maxmind_storage_path)

View File

@@ -5,9 +5,14 @@ Rails.application.routes.draw do
# Can be used by load balancers and uptime monitors to verify that the app is live. # Can be used by load balancers and uptime monitors to verify that the app is live.
get "up" => "rails/health#show", as: :rails_health_check get "up" => "rails/health#show", as: :rails_health_check
# WAF Event Ingestion API # WAF API
namespace :api, defaults: { format: :json } do namespace :api, defaults: { format: :json } do
# Event ingestion
post ":project_id/events", to: "events#create" post ":project_id/events", to: "events#create"
# Rule synchronization
get ":public_key/rules/version", to: "rules#version"
get ":public_key/rules", to: "rules#index"
end end
# Root path - projects dashboard # Root path - projects dashboard

View File

@@ -0,0 +1,56 @@
class EnhanceRulesTableForSync < ActiveRecord::Migration[8.1]
def change
# Remove rule_sets relationship (we're skipping rule sets for Phase 1)
if foreign_key_exists?(:rules, :rule_sets)
remove_foreign_key :rules, :rule_sets
end
if column_exists?(:rules, :rule_set_id)
remove_column :rules, :rule_set_id
end
change_table :rules do |t|
# Add source field to track rule origin
unless column_exists?(:rules, :source)
t.string :source, limit: 100
end
# Ensure core fields exist with proper types
unless column_exists?(:rules, :rule_type)
t.string :rule_type, null: false
end
unless column_exists?(:rules, :action)
t.string :action, null: false
end
unless column_exists?(:rules, :conditions)
t.json :conditions, null: false, default: {}
end
unless column_exists?(:rules, :metadata)
t.json :metadata, default: {}
end
unless column_exists?(:rules, :priority)
t.integer :priority
end
unless column_exists?(:rules, :expires_at)
t.datetime :expires_at
end
unless column_exists?(:rules, :enabled)
t.boolean :enabled, default: true, null: false
end
end
# Add indexes for efficient sync queries
add_index :rules, [:updated_at, :id], if_not_exists: true, name: "idx_rules_sync"
add_index :rules, :enabled, if_not_exists: true
add_index :rules, :expires_at, if_not_exists: true
add_index :rules, :source, if_not_exists: true
add_index :rules, :rule_type, if_not_exists: true
add_index :rules, [:rule_type, :enabled], if_not_exists: true, name: "idx_rules_type_enabled"
end
end

View File

@@ -0,0 +1,70 @@
class SplitNetworkRangesIntoIpv4AndIpv6 < ActiveRecord::Migration[8.1]
def change
# Drop the old network_ranges table (no data to preserve)
drop_table :network_ranges, if_exists: true
# Create optimized IPv4 ranges table
create_table :ipv4_ranges do |t|
# Range fields for fast lookups
t.integer :network_start, limit: 8, null: false
t.integer :network_end, limit: 8, null: false
t.integer :network_prefix, null: false
# IP intelligence metadata
t.string :company
t.integer :asn
t.string :asn_org
t.boolean :is_datacenter, default: false
t.boolean :is_proxy, default: false
t.boolean :is_vpn, default: false
t.string :ip_api_country
t.string :geo2_country
t.text :abuser_scores
t.text :additional_data
t.timestamp :last_api_fetch
t.timestamps
end
# Optimized indexes for IPv4
add_index :ipv4_ranges, [:network_start, :network_end, :network_prefix],
name: "idx_ipv4_range_lookup"
add_index :ipv4_ranges, :asn, name: "idx_ipv4_asn"
add_index :ipv4_ranges, :company, name: "idx_ipv4_company"
add_index :ipv4_ranges, :ip_api_country, name: "idx_ipv4_country"
add_index :ipv4_ranges, [:is_datacenter, :is_proxy, :is_vpn],
name: "idx_ipv4_flags"
# Create optimized IPv6 ranges table
create_table :ipv6_ranges do |t|
# Range fields for fast lookups (binary for 128-bit addresses)
t.binary :network_start, limit: 16, null: false
t.binary :network_end, limit: 16, null: false
t.integer :network_prefix, null: false
# IP intelligence metadata (same as IPv4)
t.string :company
t.integer :asn
t.string :asn_org
t.boolean :is_datacenter, default: false
t.boolean :is_proxy, default: false
t.boolean :is_vpn, default: false
t.string :ip_api_country
t.string :geo2_country
t.text :abuser_scores
t.text :additional_data
t.timestamp :last_api_fetch
t.timestamps
end
# Optimized indexes for IPv6
add_index :ipv6_ranges, [:network_start, :network_end, :network_prefix],
name: "idx_ipv6_range_lookup"
add_index :ipv6_ranges, :asn, name: "idx_ipv6_asn"
add_index :ipv6_ranges, :company, name: "idx_ipv6_company"
add_index :ipv6_ranges, :ip_api_country, name: "idx_ipv6_country"
add_index :ipv6_ranges, [:is_datacenter, :is_proxy, :is_vpn],
name: "idx_ipv6_flags"
end
end

View File

@@ -0,0 +1,16 @@
class CreateGeoIpDatabases < ActiveRecord::Migration[8.1]
def change
create_table :geo_ip_databases do |t|
t.string :database_type
t.string :version
t.string :file_path
t.integer :file_size
t.string :checksum_md5
t.datetime :downloaded_at
t.datetime :last_checked_at
t.boolean :is_active
t.timestamps
end
end
end

View File

@@ -0,0 +1,25 @@
# frozen_string_literal: true
class DropGeoIpDatabasesTable < ActiveRecord::Migration[8.1]
def up
drop_table :geo_ip_databases
end
def down
create_table :geo_ip_databases do |t|
t.string :database_type, null: false
t.string :version, null: false
t.string :file_path, null: false
t.integer :file_size, null: false
t.string :checksum_md5, null: false
t.datetime :downloaded_at, null: false
t.datetime :last_checked_at
t.boolean :is_active, default: true
t.timestamps
end
add_index :geo_ip_databases, :is_active
add_index :geo_ip_databases, :database_type
add_index :geo_ip_databases, :file_path, unique: true
end
end

View File

@@ -0,0 +1,74 @@
class ChangeRequestMethodToIntegerInEvents < ActiveRecord::Migration[8.1]
def change
# Convert enum columns from string to integer for proper enum support
reversible do |dir|
dir.up do
# Map request_method string values to enum integers
execute <<-SQL
UPDATE events
SET request_method = CASE
WHEN LOWER(request_method) = 'get' THEN '0'
WHEN LOWER(request_method) = 'post' THEN '1'
WHEN LOWER(request_method) = 'put' THEN '2'
WHEN LOWER(request_method) = 'patch' THEN '3'
WHEN LOWER(request_method) = 'delete' THEN '4'
WHEN LOWER(request_method) = 'head' THEN '5'
WHEN LOWER(request_method) = 'options' THEN '6'
ELSE '0' -- Default to GET for unknown values
END
WHERE request_method IS NOT NULL;
SQL
# Map waf_action string values to enum integers
execute <<-SQL
UPDATE events
SET waf_action = CASE
WHEN LOWER(waf_action) = 'allow' THEN '0'
WHEN LOWER(waf_action) IN ('deny', 'block') THEN '1'
WHEN LOWER(waf_action) = 'redirect' THEN '2'
WHEN LOWER(waf_action) = 'challenge' THEN '3'
ELSE '0' -- Default to allow for unknown values
END
WHERE waf_action IS NOT NULL;
SQL
# Change column types to integer
change_column :events, :request_method, :integer
change_column :events, :waf_action, :integer
end
dir.down do
# Convert back to string values
change_column :events, :request_method, :string
change_column :events, :waf_action, :string
execute <<-SQL
UPDATE events
SET request_method = CASE request_method
WHEN 0 THEN 'get'
WHEN 1 THEN 'post'
WHEN 2 THEN 'put'
WHEN 3 THEN 'patch'
WHEN 4 THEN 'delete'
WHEN 5 THEN 'head'
WHEN 6 THEN 'options'
ELSE 'get' -- Default to GET for unknown values
END
WHERE request_method IS NOT NULL;
SQL
execute <<-SQL
UPDATE events
SET waf_action = CASE waf_action
WHEN 0 THEN 'allow'
WHEN 1 THEN 'deny'
WHEN 2 THEN 'redirect'
WHEN 3 THEN 'challenge'
ELSE 'allow' -- Default to allow for unknown values
END
WHERE waf_action IS NOT NULL;
SQL
end
end
end
end

View File

@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do ActiveRecord::Schema[8.1].define(version: 2025_11_03_130430) do
create_table "events", force: :cascade do |t| create_table "events", force: :cascade do |t|
t.string "agent_name" t.string "agent_name"
t.string "agent_version" t.string "agent_version"
@@ -24,7 +24,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.json "payload" t.json "payload"
t.integer "project_id", null: false t.integer "project_id", null: false
t.integer "request_host_id" t.integer "request_host_id"
t.string "request_method" t.integer "request_method"
t.string "request_path" t.string "request_path"
t.string "request_protocol" t.string "request_protocol"
t.string "request_segment_ids" t.string "request_segment_ids"
@@ -36,7 +36,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.datetime "timestamp", null: false t.datetime "timestamp", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.text "user_agent" t.text "user_agent"
t.string "waf_action" t.integer "waf_action"
t.index ["event_id"], name: "index_events_on_event_id", unique: true t.index ["event_id"], name: "index_events_on_event_id", unique: true
t.index ["ip_address"], name: "index_events_on_ip_address" t.index ["ip_address"], name: "index_events_on_ip_address"
t.index ["project_id", "ip_address"], name: "index_events_on_project_id_and_ip_address" t.index ["project_id", "ip_address"], name: "index_events_on_project_id_and_ip_address"
@@ -50,7 +50,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.index ["waf_action"], name: "index_events_on_waf_action" t.index ["waf_action"], name: "index_events_on_waf_action"
end end
create_table "network_ranges", force: :cascade do |t| create_table "ipv4_ranges", force: :cascade do |t|
t.text "abuser_scores" t.text "abuser_scores"
t.text "additional_data" t.text "additional_data"
t.integer "asn" t.integer "asn"
@@ -58,21 +58,44 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.string "company" t.string "company"
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.string "geo2_country" t.string "geo2_country"
t.binary "ip_address", null: false
t.string "ip_api_country" t.string "ip_api_country"
t.integer "ip_version", null: false
t.boolean "is_datacenter", default: false t.boolean "is_datacenter", default: false
t.boolean "is_proxy", default: false t.boolean "is_proxy", default: false
t.boolean "is_vpn", default: false t.boolean "is_vpn", default: false
t.datetime "last_api_fetch" t.datetime "last_api_fetch"
t.integer "network_end", limit: 8, null: false
t.integer "network_prefix", null: false t.integer "network_prefix", null: false
t.integer "network_start", limit: 8, null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.index ["asn"], name: "idx_network_ranges_asn" t.index ["asn"], name: "idx_ipv4_asn"
t.index ["company"], name: "idx_network_ranges_company" t.index ["company"], name: "idx_ipv4_company"
t.index ["ip_address", "network_prefix"], name: "idx_network_ranges_ip_range" t.index ["ip_api_country"], name: "idx_ipv4_country"
t.index ["ip_api_country"], name: "idx_network_ranges_country" t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_ipv4_flags"
t.index ["ip_version"], name: "idx_network_ranges_version" t.index ["network_start", "network_end", "network_prefix"], name: "idx_ipv4_range_lookup"
t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_network_ranges_flags" end
create_table "ipv6_ranges", force: :cascade do |t|
t.text "abuser_scores"
t.text "additional_data"
t.integer "asn"
t.string "asn_org"
t.string "company"
t.datetime "created_at", null: false
t.string "geo2_country"
t.string "ip_api_country"
t.boolean "is_datacenter", default: false
t.boolean "is_proxy", default: false
t.boolean "is_vpn", default: false
t.datetime "last_api_fetch"
t.binary "network_end", limit: 16, null: false
t.integer "network_prefix", null: false
t.binary "network_start", limit: 16, null: false
t.datetime "updated_at", null: false
t.index ["asn"], name: "idx_ipv6_asn"
t.index ["company"], name: "idx_ipv6_company"
t.index ["ip_api_country"], name: "idx_ipv6_country"
t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_ipv6_flags"
t.index ["network_start", "network_end", "network_prefix"], name: "idx_ipv6_range_lookup"
end end
create_table "path_segments", force: :cascade do |t| create_table "path_segments", force: :cascade do |t|
@@ -101,6 +124,13 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.index ["slug"], name: "index_projects_on_slug", unique: true t.index ["slug"], name: "index_projects_on_slug", unique: true
end end
create_table "request_actions", force: :cascade do |t|
t.string "action", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["action"], name: "index_request_actions_on_action", unique: true
end
create_table "request_hosts", force: :cascade do |t| create_table "request_hosts", force: :cascade do |t|
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "first_seen_at", null: false t.datetime "first_seen_at", null: false
@@ -148,14 +178,18 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do
t.datetime "expires_at" t.datetime "expires_at"
t.json "metadata" t.json "metadata"
t.integer "priority" t.integer "priority"
t.integer "rule_set_id", null: false
t.string "rule_type" t.string "rule_type"
t.string "source", limit: 100
t.string "target" t.string "target"
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.index ["rule_set_id"], name: "index_rules_on_rule_set_id" t.index ["enabled"], name: "index_rules_on_enabled"
t.index ["expires_at"], name: "index_rules_on_expires_at"
t.index ["rule_type", "enabled"], name: "idx_rules_type_enabled"
t.index ["rule_type"], name: "index_rules_on_rule_type"
t.index ["source"], name: "index_rules_on_source"
t.index ["updated_at", "id"], name: "idx_rules_sync"
end end
add_foreign_key "events", "projects" add_foreign_key "events", "projects"
add_foreign_key "events", "request_hosts" add_foreign_key "events", "request_hosts"
add_foreign_key "rules", "rule_sets"
end end

358
docs/maxmind.md Normal file
View File

@@ -0,0 +1,358 @@
# MaxMind GeoIP Integration
This document describes the MaxMind GeoIP integration implemented in the Baffle Hub WAF analytics system.
## Overview
The Baffle Hub application uses MaxMind's free GeoLite2-Country database to provide geographic location information for IP addresses. The system automatically enriches WAF events with country codes and provides manual lookup capabilities for both IPv4 and IPv6 addresses.
## Features
- **On-demand lookup** - Country code lookup by IP address
- **Automatic enrichment** - Events are enriched with geo-location data during processing
- **Manual lookup capability** - Rake tasks and model methods for manual lookups
- **GeoLite2-Country database** - Uses MaxMind's free country-level database
- **Automatic updates** - Weekly background job updates the database
- **IPv4/IPv6 support** - Full protocol support for both IP versions
- **Performance optimized** - Database caching and efficient lookups
- **Graceful degradation** - Fallback handling when database is unavailable
## Architecture
### Core Components
#### 1. GeoIpService
- Central service for all IP geolocation operations
- Handles database loading from file system
- Provides batch lookup capabilities
- Manages database updates from MaxMind CDN
- Uses MaxMind's built-in metadata for version information
#### 2. UpdateGeoIpDatabaseJob
- Background job for automatic database updates
- Runs weekly to keep the database current
- Simple file-based validation and updates
#### 3. Enhanced Models
- **Event Model** - Automatic geo-location enrichment for WAF events
- **IPv4Range/IPv6Range Models** - Manual lookup methods for IP ranges
#### 4. File-System Management
- Database stored as single file: `db/geoip/GeoLite2-Country.mmdb`
- Version information queried directly from MaxMind database metadata
- No database tables needed - simplified approach
## Installation & Setup
### Dependencies
The integration uses the following gems:
- `maxmind-db` - Official MaxMind database reader (with built-in caching)
- `httparty` - HTTP client for database downloads
### Database Storage
- Location: `db/geoip/GeoLite2-Country.mmdb`
- Automatic creation of storage directory
- File validation and integrity checking
- Version information queried directly from database metadata
- No additional caching needed - MaxMind DB has its own internal caching
### Initial Setup
```bash
# Install dependencies
bundle install
# Download the GeoIP database
rails geoip:update
# Verify installation
rails geoip:status
```
## Configuration
The system is configurable via environment variables or application configuration:
| Variable | Default | Description |
|----------|---------|-------------|
| `MAXMIND_DATABASE_URL` | MaxMind CDN URL | Database download URL |
| `MAXMIND_AUTO_UPDATE` | `true` | Enable automatic weekly updates |
| `MAXMIND_UPDATE_INTERVAL_DAYS` | `7` | Days between update checks |
| `MAXMIND_MAX_AGE_DAYS` | `30` | Maximum database age before forced update |
| Note: MaxMind DB has built-in caching, no additional caching needed |
| `MAXMIND_FALLBACK_COUNTRY` | `nil` | Fallback country when lookup fails |
| `MAXMIND_ENABLE_FALLBACK` | `false` | Enable fallback country usage |
### Example Configuration
```bash
# config/application.rb or .env file
MAXMIND_AUTO_UPDATE=true
MAXMIND_UPDATE_INTERVAL_DAYS=7
MAXMIND_MAX_AGE_DAYS=30
MAXMIND_FALLBACK_COUNTRY=US
MAXMIND_ENABLE_FALLBACK=true
# Note: No caching configuration needed - MaxMind has built-in caching
```
## Usage
### Rake Tasks
#### Database Management
```bash
# Download/update the GeoIP database
rails geoip:update
# Check database status and configuration
rails geoip:status
# Test the implementation with sample IPs
rails geoip:test
# Manual lookup for a specific IP
rails geoip:lookup[8.8.8.8]
rails geoip:lookup[2001:4860:4860::8888]
```
#### Data Management
```bash
# Enrich existing events missing country codes
rails geoip:enrich_missing
# Clean up old inactive database records
rails geoip:cleanup
```
### Ruby API
#### Service-Level Lookups
```ruby
# Direct country lookup
country = GeoIpService.lookup_country('8.8.8.8')
# => "US"
# Batch lookup
countries = GeoIpService.new.lookup_countries(['8.8.8.8', '1.1.1.1'])
# => { "8.8.8.8" => "US", "1.1.1.1" => nil }
# Check database availability
service = GeoIpService.new
service.database_available? # => true/false
service.database_info # => Database metadata
```
#### Event Model Integration
```ruby
# Automatic enrichment during event processing
event = Event.find(123)
event.enrich_geo_location! # Updates event with country code
event.lookup_country # => "US" (with fallback to service)
event.has_geo_data? # => true/false
event.geo_location # => { country_code: "US", city: nil, ... }
# Batch enrichment of existing events
updated_count = Event.enrich_geo_location_batch
puts "Enriched #{updated_count} events with geo data"
```
#### IP Range Model Integration
```ruby
# IPv4 Range lookups
range = Ipv4Range.find(123)
range.geo_lookup_country! # Updates range with country code
range.geo_lookup_country # => "US" (without updating)
range.has_country_info? # => true/false
range.primary_country # => "US" (best available country)
# Class methods
country = Ipv4Range.lookup_country_by_ip('8.8.8.8')
updated_count = Ipv4Range.enrich_missing_geo_data(limit: 1000)
# IPv6 Range lookups (same interface)
country = Ipv6Range.lookup_country_by_ip('2001:4860:4860::8888')
updated_count = Ipv6Range.enrich_missing_geo_data(limit: 1000)
```
### Background Processing
#### Automatic Updates
The system automatically schedules database updates:
```ruby
# Manually trigger an update (usually scheduled automatically)
UpdateGeoIpDatabaseJob.perform_later
# Force update regardless of age
UpdateGeoIpDatabaseJob.perform_later(force_update: true)
```
#### Event Processing Integration
Geo-location enrichment is automatically included in WAF event processing:
```ruby
# This is called automatically in ProcessWafEventJob
event = Event.create_from_waf_payload!(event_id, payload, project)
event.enrich_geo_location! if event.ip_address.present? && event.country_code.blank?
```
## Database Information
### GeoLite2-Country Database
- **Source**: MaxMind GeoLite2-Country (free version)
- **Update Frequency**: Weekly (Tuesdays)
- **Size**: ~9.5 MB
- **Coverage**: Global IP-to-country mapping
- **Format**: MaxMind DB (.mmdb)
### Database Fields
- `country.iso_code` - Two-letter ISO country code
- Supports both IPv4 and IPv6 addresses
- Includes anonymous/proxy detection metadata
## Performance Considerations
### Performance
- MaxMind DB has built-in internal caching optimized for lookups
- Typical lookup time: <1ms
- Database size optimized for fast lookups
- No additional caching layer needed
### Lookup Performance
- Typical lookup time: <1ms
- Database size optimized for fast lookups
- Efficient range queries for IP networks
### Memory Usage
- Database loaded into memory for fast access
- Approximate memory usage: 15-20 MB for the country database
- Automatic cleanup of old database files
## Error Handling
### Graceful Degradation
- Service returns `nil` when database unavailable
- Logging at appropriate levels for different error types
- Event processing continues even if geo-location fails
### Common Error Scenarios
1. **Database Missing** - Automatic download triggered
2. **Database Corrupted** - Automatic re-download attempted
3. **Network Issues** - Graceful fallback with error logging
4. **Invalid IP Address** - Returns `nil` with warning log
## Troubleshooting
### Check System Status
```bash
# Verify database status
rails geoip:status
# Test with known IPs
rails geoip:test
# Check logs for errors
tail -f log/production.log | grep GeoIP
```
### Common Issues
#### Database Not Available
```bash
# Force database update
rails geoip:update
# Check file permissions
ls -la db/geoip/
```
#### Lookup Failures
```bash
# Test specific IPs
rails geoip:lookup[8.8.8.8]
# Check database validity
rails runner "puts GeoIpService.new.database_available?"
```
#### Performance Issues
- Increase cache size in configuration
- Check memory usage on deployment server
- Monitor lookup times with application metrics
## Monitoring & Maintenance
### Health Checks
```ruby
# Rails console health check
service = GeoIpService.new
puts "Database available: #{service.database_available?}"
puts "Database age: #{service.database_record&.age_in_days} days"
```
### Scheduled Maintenance
- Database automatically updated weekly
- Old database files cleaned up after 7 days
- No manual maintenance required
### Monitoring Metrics
Consider monitoring:
- Database update success/failure rates
- Lookup performance (response times)
- Database age and freshness
- Cache hit/miss ratios
## Security & Privacy
### Data Privacy
- No personal data stored in the GeoIP database
- Only country-level information provided
- No tracking or logging of IP lookups by default
### Network Security
- Database downloaded from official MaxMind CDN
- File integrity validated with MD5 checksums
- Secure temporary file handling during updates
## API Reference
### GeoIpService
#### Class Methods
- `lookup_country(ip_address)` - Direct lookup
- `update_database!` - Force database update
#### Instance Methods
- `lookup_country(ip_address)` - Country lookup
- `lookup_countries(ip_addresses)` - Batch lookup
- `database_available?` - Check database status
- `database_info` - Get database metadata
- `update_from_remote!` - Download new database
### Model Methods
#### Event Model
- `enrich_geo_location!` - Update with country code
- `lookup_country` - Get country code (with fallback)
- `has_geo_data?` - Check if geo data exists
- `geo_location` - Get full geo location hash
#### IPv4Range/IPv6Range Models
- `geo_lookup_country!` - Update range with country code
- `geo_lookup_country` - Get country code (without update)
- `has_country_info?` - Check for existing country data
- `primary_country` - Get best available country code
- `lookup_country_by_ip(ip)` - Class method for IP lookup
- `enrich_missing_geo_data(limit:)` - Class method for batch enrichment
## Support & Resources
### MaxMind Documentation
- [MaxMind Developer Site](https://dev.maxmind.com/)
- [GeoLite2 Databases](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data)
- [Database Accuracy](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data#accuracy)
### Ruby Libraries
- [maxmind-db gem](https://github.com/maxmind/MaxMind-DB-Reader-ruby)
- [httparty gem](https://github.com/jnunemaker/httparty)
### Troubleshooting Resources
- Application logs: `log/production.log`
- Rails console for manual testing
- Database status via `rails geoip:status`

625
docs/rule-architecture.md Normal file
View File

@@ -0,0 +1,625 @@
# Baffle Hub - Rule Architecture
## Overview
Baffle Hub uses a distributed rule system where the Hub generates and manages rules, and Agents download and enforce them locally using optimized SQLite queries. This architecture provides sub-millisecond rule evaluation while maintaining centralized intelligence and control.
## Core Principles
1. **Hub-side Intelligence**: Pattern detection and rule generation happens on the Hub
2. **Agent-side Enforcement**: Rule evaluation happens locally on Agents for speed
3. **Incremental Sync**: Agents poll for rule updates using timestamp-based cursors
4. **Dynamic Backpressure**: Hub controls event sampling based on load
5. **Temporal Rules**: Rules can expire automatically (e.g., 24-hour bans)
6. **Soft Deletes**: Rules are disabled, not deleted, for proper sync and audit trail
## Rule Types
### 1. Network Rules (`network_v4`, `network_v6`)
Block or allow traffic based on IP address or CIDR ranges.
**Use Cases**:
- Block scanner IPs (temporary or permanent)
- Block datacenter/VPN/proxy ranges
- Allow trusted IP ranges
- Geographic blocking via IP ranges
**Evaluation**:
- **Most specific CIDR wins** (smallest prefix)
- `/32` beats `/24` beats `/16` beats `/8`
- Agent uses optimized range queries on `ipv4_ranges`/`ipv6_ranges` tables
**Example**:
```json
{
"id": 12341,
"rule_type": "network_v4",
"action": "deny",
"conditions": { "cidr": "185.220.100.0/22" },
"priority": 22,
"expires_at": "2024-11-04T12:00:00Z",
"enabled": true,
"source": "auto:scanner_detected",
"metadata": {
"reason": "Tor exit node hitting /.env",
"auto_generated": true
}
}
```
### 2. Rate Limit Rules (`rate_limit`)
Control request rate per IP or per CIDR range.
**Scopes** (Phase 1):
- **Global per-IP**: Limit requests per IP across all paths
- **Per-CIDR**: Different limits for different network ranges
**Scopes** (Phase 2+):
- **Per-path per-IP**: Different limits for `/api/*`, `/login`, etc.
**Evaluation**:
- Agent maintains in-memory counters per IP
- Finds most specific CIDR rule for the IP
- Applies that rule's rate limit configuration
- Optional: Persist counters to SQLite for restart resilience
**Example (Phase 1)**:
```json
{
"id": 12342,
"rule_type": "rate_limit",
"action": "rate_limit",
"conditions": {
"cidr": "0.0.0.0/0",
"scope": "global"
},
"priority": 0,
"enabled": true,
"source": "manual",
"metadata": {
"limit": 100,
"window": 60,
"per_ip": true
}
}
```
**Example (Phase 2+)**:
```json
{
"id": 12343,
"rule_type": "rate_limit",
"action": "rate_limit",
"conditions": {
"cidr": "0.0.0.0/0",
"scope": "per_path",
"path_pattern": "/api/login"
},
"metadata": {
"limit": 5,
"window": 60,
"per_ip": true
}
}
```
### 3. Path Pattern Rules (`path_pattern`)
Detect suspicious path access patterns (mainly for Hub analytics).
**Use Cases**:
- Detect scanners hitting `/.env`, `/.git`, `/wp-admin`
- Identify bots with suspicious path traversal
- Trigger automatic IP bans when patterns match
**Evaluation**:
- Agent does lightweight pattern matching
- When matched, sends event to Hub with `matched_pattern: true`
- Hub analyzes and creates IP block rules if needed
- Agent picks up new IP block rule in next sync (~10s)
**Example**:
```json
{
"id": 12344,
"rule_type": "path_pattern",
"action": "log",
"conditions": {
"patterns": ["/.env", "/.git/*", "/wp-admin/*", "/.aws/*", "/phpMyAdmin/*"]
},
"enabled": true,
"source": "default:scanner_detection",
"metadata": {
"auto_ban_ip": true,
"ban_duration_hours": 24,
"description": "Common scanner paths"
}
}
```
## Rule Actions
| Action | Description | HTTP Response |
|--------|-------------|---------------|
| `allow` | Pass request through | Continue to app |
| `deny` | Block request | 403 Forbidden |
| `rate_limit` | Enforce rate limit | 429 Too Many Requests |
| `redirect` | Redirect to URL | 301/302 + Location header |
| `challenge` | Show CAPTCHA (Phase 2+) | 403 with challenge |
| `log` | Log only, don't block | Continue to app |
## Rule Priority & Specificity
### Network Rules
- **Priority is determined by CIDR prefix length**
- Smaller prefix (more specific) = higher priority
- `/32` (single IP) beats `/24` (256 IPs) beats `/8` (16M IPs)
- Example: Block `10.0.0.0/8` but allow `10.0.1.0/24`
- Request from `10.0.1.5` → matches `/24` → allowed
- Request from `10.0.2.5` → matches `/8` only → blocked
### Rate Limit Rules
- Most specific CIDR match wins
- Per-path rules take precedence over global (Phase 2+)
### Path Pattern Rules
- All patterns are evaluated (not exclusive)
- Used for detection, not blocking
- Multiple pattern matches = stronger signal for ban
## Rule Synchronization
### Timestamp-Based Cursor
Agents use `updated_at` timestamps as sync cursors to handle rule updates and deletions.
**Why `updated_at` instead of `id`?**
- Handles rule updates (e.g., disabling a rule updates `updated_at`)
- Handles rule deletions via `enabled=false` flag
- Simple for agents: "give me everything that changed since X"
**Agent Sync Flow**:
```
1. Agent starts: last_sync = nil
2. GET /api/:key/rules → Full sync, store latest updated_at
3. Every 10s or 1000 events: GET /api/:key/rules?since=<last_sync>
4. Process rules: add new, update existing, remove disabled
5. Update last_sync to latest updated_at from response
```
**Query Overlap**: Hub queries `updated_at >= since - 0.5s` to handle clock skew and millisecond duplicates.
### API Endpoints
#### 1. Version Check (Lightweight)
```http
GET /api/:public_key/rules/version
Response:
{
"version": "2024-11-03T12:30:45.123Z",
"count": 150,
"sampling": {
"allowed_requests": 0.5,
"blocked_requests": 1.0,
"rate_limited_requests": 1.0,
"effective_until": "2024-11-03T12:30:55.123Z"
}
}
```
#### 2. Incremental Sync
```http
GET /api/:public_key/rules?since=2024-11-03T12:00:00.000Z
Response:
{
"version": "2024-11-03T12:30:45.123Z",
"sampling": { ... },
"rules": [
{
"id": 12341,
"rule_type": "network_v4",
"action": "deny",
"conditions": { "cidr": "1.2.3.4/32" },
"priority": 32,
"expires_at": "2024-11-04T12:00:00Z",
"enabled": true,
"source": "auto:scanner_detected",
"metadata": { "reason": "Hitting /.env" },
"created_at": "2024-11-03T12:00:00Z",
"updated_at": "2024-11-03T12:00:00Z"
},
{
"id": 12340,
"rule_type": "network_v4",
"action": "deny",
"conditions": { "cidr": "5.6.7.8/32" },
"priority": 32,
"enabled": false,
"source": "manual",
"metadata": { "reason": "False positive" },
"created_at": "2024-11-02T10:00:00Z",
"updated_at": "2024-11-03T12:25:00Z"
}
]
}
```
#### 3. Full Sync
```http
GET /api/:public_key/rules
Response:
{
"version": "2024-11-03T12:30:45.123Z",
"sampling": { ... },
"rules": [ ...all enabled rules... ]
}
```
## Dynamic Event Sampling
Hub controls how many events Agents send based on load.
### Sampling Strategy
**Hub monitors**:
- SolidQueue job depth
- Events/second rate
- Database write latency
**Sampling rates**:
```ruby
Queue Depth | Allowed | Blocked | Rate Limited
----------------|---------|---------|-------------
0-1,000 | 100% | 100% | 100%
1,001-5,000 | 50% | 100% | 100%
5,001-10,000 | 20% | 100% | 100%
10,001+ | 5% | 100% | 100%
```
**Phase 2+: Path-based sampling**:
```json
{
"sampling": {
"allowed_requests": 0.1,
"blocked_requests": 1.0,
"paths": {
"block": ["/.env", "/.git/*"],
"allow": ["/health", "/metrics"]
}
}
}
```
**Agent respects sampling**:
- Always sends blocked/rate-limited events
- Samples allowed events based on rate
- Can prioritize suspicious paths over routine traffic
## Temporal Rules (Expiration)
Rules can have an `expires_at` timestamp for automatic expiration.
**Use Cases**:
- 24-hour scanner bans
- Temporary rate limit adjustments
- Time-boxed maintenance blocks
**Cleanup**:
- `ExpiredRulesCleanupJob` runs hourly
- Disables rules where `expires_at < now`
- Agent picks up disabled rules in next sync
**Example**:
```ruby
# Hub auto-generates rule when scanner detected:
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "1.2.3.4/32" },
expires_at: 24.hours.from_now,
source: "auto:scanner_detected",
metadata: { reason: "Hit /.env 5 times in 10 seconds" }
)
# 24 hours later: ExpiredRulesCleanupJob disables it
# Agent syncs and removes from ipv4_ranges table
```
## Rule Sources
The `source` field tracks rule origin for audit and filtering.
**Source Formats**:
- `manual` - Created by user via UI
- `auto:scanner_detected` - Auto-generated from scanner pattern
- `auto:rate_limit_exceeded` - Auto-generated from rate limit abuse
- `auto:bot_detected` - Auto-generated from bot behavior
- `imported:fail2ban` - Imported from external source
- `imported:crowdsec` - Imported from CrowdSec
- `default:scanner_paths` - Default rule set
## Database Schema
### Hub Schema
```ruby
create_table "rules" do |t|
# Identification
t.integer :id, primary_key: true
t.string :source, limit: 100
# Rule definition
t.string :rule_type, null: false
t.string :action, null: false
t.json :conditions, null: false
t.json :metadata
# Priority & lifecycle
t.integer :priority
t.datetime :expires_at
t.boolean :enabled, default: true, null: false
# Timestamps (updated_at is sync cursor!)
t.timestamps
# Indexes
t.index [:updated_at, :id] # Primary sync query
t.index :enabled
t.index :expires_at
t.index :source
t.index :rule_type
end
```
### Agent Schema (Existing)
```ruby
create_table "ipv4_ranges" do |t|
t.integer :network_start, limit: 8, null: false
t.integer :network_end, limit: 8, null: false
t.integer :network_prefix, null: false
t.integer :waf_action, default: 0, null: false
t.integer :priority, default: 100
t.string :redirect_url, limit: 500
t.integer :redirect_status
t.string :source, limit: 50
t.timestamps
t.index [:network_start, :network_end, :network_prefix]
t.index :waf_action
end
create_table "ipv6_ranges" do |t|
t.binary :network_start, limit: 16, null: false
t.binary :network_end, limit: 16, null: false
t.integer :network_prefix, null: false
t.integer :waf_action, default: 0, null: false
t.integer :priority, default: 100
t.string :redirect_url, limit: 500
t.integer :redirect_status
t.string :source, limit: 50
t.timestamps
t.index [:network_start, :network_end, :network_prefix]
t.index :waf_action
end
```
## Agent Rule Processing
### Network Rules
```ruby
# Agent receives network rule from Hub:
rule = {
id: 12341,
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
priority: 8,
enabled: true
}
# Agent converts to ipv4_ranges entry:
cidr = IPAddr.new("10.0.0.0/8")
Ipv4Range.upsert({
source: "hub:12341",
network_start: cidr.to_i,
network_end: cidr.to_range.end.to_i,
network_prefix: 8,
waf_action: 1, # deny
priority: 8
}, unique_by: :source)
# Agent evaluates request:
# SELECT * FROM ipv4_ranges
# WHERE ? BETWEEN network_start AND network_end
# ORDER BY network_prefix DESC
# LIMIT 1
```
### Rate Limit Rules
```ruby
# Agent stores in memory:
@rate_limit_rules = {
"global" => { limit: 100, window: 60, cidr: "0.0.0.0/0" }
}
@rate_counters = {
"1.2.3.4" => { count: 50, window_start: Time.now }
}
# On each request:
def check_rate_limit(ip)
rule = find_most_specific_rate_limit_rule(ip)
counter = @rate_counters[ip] ||= { count: 0, window_start: Time.now }
# Reset window if expired
if Time.now - counter[:window_start] > rule[:window]
counter = { count: 0, window_start: Time.now }
end
counter[:count] += 1
if counter[:count] > rule[:limit]
{ action: "rate_limit", status: 429 }
else
{ action: "allow" }
end
end
```
### Path Pattern Rules
```ruby
# Agent evaluates patterns:
PATH_PATTERNS = [/.env$/, /.git/, /wp-admin/]
def check_path_patterns(path)
matched = PATH_PATTERNS.any? { |pattern| path.match?(pattern) }
if matched
# Send event to Hub with flag
send_event_to_hub(
path: path,
matched_pattern: true,
waf_action: "log" # Don't block yet
)
# Hub will analyze and create IP block rule if needed
end
end
```
## Hub Intelligence (Auto-Generation)
### Scanner Detection
```ruby
# PathScannerDetectorJob
class PathScannerDetectorJob < ApplicationJob
SCANNER_PATHS = %w[/.env /.git /wp-admin /phpMyAdmin /.aws]
def perform
# Find IPs hitting scanner paths
scanner_ips = Event
.where("request_path IN (?)", SCANNER_PATHS)
.where("timestamp > ?", 5.minutes.ago)
.group(:ip_address)
.having("COUNT(*) >= 3")
.pluck(:ip_address)
scanner_ips.each do |ip|
# Create 24h ban rule
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "#{ip}/32" },
priority: 32,
expires_at: 24.hours.from_now,
source: "auto:scanner_detected",
metadata: {
reason: "Hit #{SCANNER_PATHS.join(', ')}",
auto_generated: true
}
)
end
end
end
```
### Rate Limit Abuse Detection
```ruby
# RateLimitAnomalyJob
class RateLimitAnomalyJob < ApplicationJob
def perform
# Find IPs exceeding normal rate
abusive_ips = Event
.where("timestamp > ?", 1.minute.ago)
.group(:ip_address)
.having("COUNT(*) > 200") # >200 req/min
.pluck(:ip_address)
abusive_ips.each do |ip|
# Create aggressive rate limit or block
Rule.create!(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "#{ip}/32", scope: "global" },
priority: 32,
expires_at: 1.hour.from_now,
source: "auto:rate_limit_exceeded",
metadata: {
limit: 10,
window: 60,
per_ip: true
}
)
end
end
end
```
## Performance Characteristics
### Hub
- **Rule query**: O(log n) with `(updated_at, id)` index
- **Version check**: Single index lookup
- **Rule generation**: Background jobs, no request impact
### Agent
- **Network rule lookup**: O(log n) via B-tree index on `(network_start, network_end)`
- **Rate limit check**: O(1) hash lookup in memory
- **Path pattern check**: O(n) regex match (n = number of patterns)
- **Overall request evaluation**: <1ms for typical case
### Sync Efficiency
- **Incremental sync**: Only changed rules since last sync
- **Typical sync payload**: <10 KB for 50 rules
- **Sync frequency**: Every 10s or 1000 events
- **Version check**: <1 KB response
## Future Enhancements (Phase 2+)
### Per-Path Rate Limiting
- Different limits for `/api/*`, `/login`, `/admin`
- Agent tracks multiple counters per IP
### Path-Based Event Sampling
- Send all `/admin` requests
- Skip `/health`, `/metrics`
- Sample 10% of regular traffic
### Challenge Actions
- CAPTCHA challenges for suspicious IPs
- JavaScript challenges for bot detection
### Scheduled Rules
- Block during maintenance windows
- Time-of-day rate limits
### Multi-Project Rules (Phase 10+)
- Global rules across all projects
- Per-project rule overrides
## Summary
The Baffle Hub rule system provides:
- **Fast local enforcement** (sub-millisecond)
- **Centralized intelligence** (Hub analytics)
- **Efficient synchronization** (timestamp-based incremental sync)
- **Dynamic adaptation** (backpressure control via sampling)
- **Temporal flexibility** (auto-expiring rules)
- **Audit trail** (soft deletes, source tracking)
This architecture scales from single-server deployments to distributed multi-agent installations while maintaining simplicity and pragmatic design choices focused on the "low-hanging fruit" of WAF functionality.

View File

@@ -0,0 +1,381 @@
# Rule System Implementation Summary
## What We Built
A complete distributed WAF rule synchronization system that allows the Baffle Hub to generate and manage rules while Agents download and enforce them locally with sub-millisecond latency.
## Implementation Status: ✅ Complete (Phase 1)
### 1. Database Schema ✅
**Migration**: `db/migrate/20251103080823_enhance_rules_table_for_sync.rb`
Enhanced the `rules` table with:
- `source` field to track rule origin (manual, auto-generated, imported)
- JSON `conditions` and `metadata` fields
- `expires_at` for temporal rules (24h bans)
- `enabled` flag for soft deletes
- `priority` for rule specificity
- Optimized indexes for sync queries (`updated_at, id`)
**Schema**:
```ruby
create_table "rules" do |t|
t.string :rule_type, null: false # network_v4, network_v6, rate_limit, path_pattern
t.string :action, null: false # allow, deny, rate_limit, redirect, log
t.json :conditions, null: false # CIDR, patterns, scope
t.json :metadata # reason, limits, redirect_url
t.integer :priority # Auto-calculated from CIDR prefix
t.datetime :expires_at # For temporal bans
t.boolean :enabled, default: true # Soft delete flag
t.string :source, limit: 100 # Origin tracking
t.timestamps
# Indexes for efficient sync
t.index [:updated_at, :id] # Primary sync cursor
t.index :enabled
t.index :expires_at
t.index [:rule_type, :enabled]
end
```
### 2. Rule Model ✅
**File**: `app/models/rule.rb`
Complete Rule model with:
- **Rule types**: `network_v4`, `network_v6`, `rate_limit`, `path_pattern`
- **Actions**: `allow`, `deny`, `rate_limit`, `redirect`, `log`
- **Validations**: Type-specific validation for conditions and metadata
- **Scopes**: `active`, `expired`, `network_rules`, `rate_limit_rules`, etc.
- **Sync methods**: `since(timestamp)`, `latest_version`
- **Auto-priority**: Calculates priority from CIDR prefix length
- **Agent format**: `to_agent_format` for API responses
**Example Usage**:
```ruby
# Create network block rule
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "1.2.3.4/32" },
expires_at: 24.hours.from_now,
source: "auto:scanner_detected",
metadata: { reason: "Hit /.env multiple times" }
)
# Create rate limit rule
Rule.create!(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "0.0.0.0/0", scope: "global" },
metadata: { limit: 100, window: 60, per_ip: true },
source: "manual"
)
# Disable rule (soft delete)
rule.disable!(reason: "False positive")
# Query for sync
Rule.since("2025-11-03T08:00:00.000Z")
```
### 3. API Endpoints ✅
**Controller**: `app/controllers/api/rules_controller.rb`
**Routes**: Added to `config/routes.rb`
#### Version Endpoint (Lightweight Check)
```http
GET /api/:public_key/rules/version
Response:
{
"version": "2025-11-03T08:14:23.648330Z",
"count": 150,
"sampling": {
"allowed_requests": 1.0,
"blocked_requests": 1.0,
"rate_limited_requests": 1.0,
"effective_until": "2025-11-03T08:14:33.689Z",
"load_level": "normal",
"queue_depth": 0
}
}
```
#### Incremental Sync
```http
GET /api/:public_key/rules?since=2025-11-03T08:00:00.000Z
Response:
{
"version": "2025-11-03T08:14:23.648330Z",
"sampling": { ... },
"rules": [
{
"id": 1,
"rule_type": "network_v4",
"action": "deny",
"conditions": { "cidr": "10.0.0.0/8" },
"priority": 8,
"expires_at": null,
"enabled": true,
"source": "manual",
"metadata": { "reason": "Testing" },
"created_at": "2025-11-03T08:14:23Z",
"updated_at": "2025-11-03T08:14:23Z"
}
]
}
```
#### Full Sync
```http
GET /api/:public_key/rules
Response: Same format, returns all active rules
```
### 4. Dynamic Load-Based Sampling ✅
**Service**: `app/services/hub_load.rb`
Monitors SolidQueue depth and adjusts event sampling rates:
| Queue Depth | Load Level | Allowed | Blocked | Rate Limited |
|-------------|------------|---------|---------|--------------|
| 0-1,000 | Normal | 100% | 100% | 100% |
| 1,001-5,000 | Moderate | 50% | 100% | 100% |
| 5,001-10,000| High | 20% | 100% | 100% |
| 10,001+ | Critical | 5% | 100% | 100% |
**Features**:
- Automatic backpressure control
- Always sends 100% of blocks/rate-limits
- Reduces allowed request sampling under load
- Included in every API response
### 5. Background Jobs ✅
#### ExpiredRulesCleanupJob
**File**: `app/jobs/expired_rules_cleanup_job.rb`
- Runs hourly
- Disables rules with `expires_at` in the past
- Cleans up old disabled rules (>30 days) once per day
- Agents pick up disabled rules via `updated_at` change
#### PathScannerDetectorJob
**File**: `app/jobs/path_scanner_detector_job.rb`
- Runs every 5 minutes (recommended)
- Detects IPs hitting scanner paths (/.env, /.git, /wp-admin, etc.)
- Auto-creates 24h ban rules after 3+ hits
- Handles both IPv4 and IPv6
- Prevents duplicate rules
**Scanner Paths**:
- `/.env`, `/.git`, `/.aws`, `/.ssh`, `/.config`
- `/wp-admin`, `/wp-login.php`
- `/phpMyAdmin`, `/phpmyadmin`
- `/admin`, `/administrator`
- `/backup`, `/db_backup`
- `/.DS_Store`, `/web.config`
## Testing
### Create Test Rules
```bash
bin/rails runner '
# Network block
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
source: "manual",
metadata: { reason: "Test block" }
)
# Rate limit
Rule.create!(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "0.0.0.0/0", scope: "global" },
metadata: { limit: 100, window: 60 },
source: "manual"
)
puts "✓ Created #{Rule.count} rules"
puts "✓ Latest version: #{Rule.latest_version}"
'
```
### Test API Endpoints
```bash
# Get your project key
bin/rails runner 'puts Project.first.public_key'
# Test version endpoint
curl http://localhost:3000/api/YOUR_PUBLIC_KEY/rules/version | jq
# Test full sync
curl http://localhost:3000/api/YOUR_PUBLIC_KEY/rules | jq
# Test incremental sync
curl "http://localhost:3000/api/YOUR_PUBLIC_KEY/rules?since=2025-11-03T08:00:00.000Z" | jq
```
### Run Background Jobs
```bash
# Test expired rules cleanup
bin/rails runner 'ExpiredRulesCleanupJob.perform_now'
# Test scanner detector (needs events first)
bin/rails runner 'PathScannerDetectorJob.perform_now'
# Check hub load
bin/rails runner 'puts HubLoad.stats.inspect'
```
## Agent Integration (Next Steps)
The Agent needs to:
1. **Poll for updates** every 10 seconds or 1000 events:
```ruby
GET /api/:public_key/rules?since=<last_updated_at>
```
2. **Process rules** received:
- `enabled: true` → Insert/update in local tables
- `enabled: false` → Remove from local tables
3. **Populate local SQLite tables**:
```ruby
# For network_v4 rules:
cidr = IPAddr.new(rule.conditions.cidr)
Ipv4Range.upsert({
source: "hub:#{rule.id}",
network_start: cidr.to_i,
network_end: cidr.to_range.end.to_i,
network_prefix: rule.priority,
waf_action: map_action(rule.action),
redirect_url: rule.metadata.redirect_url,
priority: rule.priority
})
```
4. **Respect sampling rates** from API response:
```ruby
sampling = response["sampling"]
if event.allowed? && rand > sampling["allowed_requests"]
skip_sending_to_hub
end
```
## Key Design Decisions
### ✅ IPv4/IPv6 Split
- Separate `network_v4` and `network_v6` rule types
- Agent has separate `ipv4_ranges` and `ipv6_ranges` tables
- Better performance (integer vs binary indexes)
### ✅ Timestamp-Based Sync
- Use `updated_at` as version cursor (not `id`)
- Handles rule updates and soft deletes
- Query overlap (0.5s) handles clock skew
- Secondary sort by `id` for consistency
### ✅ Soft Deletes
- Rules disabled, not deleted
- Audit trail preserved
- Agents sync via `enabled: false`
- Old rules cleaned after 30 days
### ✅ Priority from CIDR
- Auto-calculated from prefix length
- Most specific (smallest prefix) wins
- `/32` > `/24` > `/16` > `/8`
- No manual priority needed for network rules
### ✅ Dynamic Sampling
- Hub controls load via sampling rates
- Always sends critical events (blocks, rate limits)
- Reduces allowed event traffic under load
- Prevents Hub overload
## Performance Characteristics
### Hub
- **Version check**: Single index lookup (~1ms)
- **Incremental sync**: Index scan on `(updated_at, id)` (~5-10ms for 100 rules)
- **Rule creation**: Single insert (~5ms)
### Agent (Expected)
- **Network lookup**: O(log n) via B-tree on `(network_start, network_end)` (<1ms)
- **Rate limit check**: O(1) hash lookup in memory (<0.1ms)
- **Sync overhead**: 10s polling, ~5-10 KB payload for 50 rules
## What's Not Included (Future Phases)
- ❌ Per-path rate limiting (Phase 2)
- ❌ Path-based event sampling (Phase 2)
- ❌ Challenge actions/CAPTCHA (Phase 2+)
- ❌ Multi-project rules (Phase 10+)
- ❌ Rule UI (manual creation via console for now)
- ❌ Recurring job scheduling (needs separate setup)
## Next Implementation Steps
1. **Schedule Background Jobs**
- Add to `config/initializers/recurring_jobs.rb` or use gem like `good_job`
- `ExpiredRulesCleanupJob` every hour
- `PathScannerDetectorJob` every 5 minutes
2. **Build Rule Management UI**
- Form to create network block rules
- List active rules
- Disable/enable rules
- View auto-generated rules
3. **Agent Sync Implementation**
- HTTP client to poll rules endpoint
- SQLite population logic
- Sampling rate respect
- Rule evaluation integration
4. **Monitoring/Metrics**
- Dashboard showing active rules count
- Auto-generated rules per day
- Banned IPs list
- Rule sync lag per agent
## Documentation
Complete architecture documentation available at:
- **docs/rule-architecture.md** - Full technical specification
- **This file** - Implementation summary and testing guide
## Summary
We've built a production-ready, distributed WAF rule system with:
- ✅ Database schema with optimized indexes
- ✅ Complete Rule model with validations
- ✅ RESTful API with version/incremental/full sync
- ✅ Dynamic load-based event sampling
- ✅ Auto-expiring temporal rules
- ✅ Scanner detection and auto-banning
- ✅ Soft deletes with audit trail
- ✅ IPv4/IPv6 separation
- ✅ Comprehensive documentation
The system is ready for Agent integration and can scale from single-server to multi-agent distributed deployments.

168
lib/tasks/geoip.rake Normal file
View File

@@ -0,0 +1,168 @@
# frozen_string_literal: true
namespace :geoip do
desc "Update the GeoIP database"
task update: :environment do
puts "Updating GeoIP database..."
success = GeoIpService.update_database!
if success
puts "✅ GeoIP database successfully updated"
else
puts "❌ Failed to update GeoIP database"
end
end
desc "Lookup country for a specific IP address"
task :lookup, [:ip_address] => :environment do |_t, args|
ip_address = args[:ip_address]
if ip_address.blank?
puts "❌ Please provide an IP address: rails geoip:lookup[8.8.8.8]"
exit 1
end
puts "Looking up country for #{ip_address}..."
begin
country = GeoIpService.lookup_country(ip_address)
if country.present?
puts "📍 #{ip_address}#{country}"
else
puts "❓ No country found for #{ip_address}"
end
rescue => e
puts "❌ Error looking up #{ip_address}: #{e.message}"
end
end
desc "Show GeoIP database status"
task status: :environment do
puts "GeoIP Database Status"
puts "=" * 40
geo_service = GeoIpService.new
database_info = geo_service.database_info
if geo_service.database_available? && database_info
puts "✅ Active database found:"
puts " Type: #{database_info[:type]}"
puts " Version: #{database_info[:version]}"
puts " Size: #{number_to_human_size(database_info[:size])}"
puts " Modified: #{database_info[:modified_at].strftime('%Y-%m-%d %H:%M:%S UTC')}"
puts " Age: #{database_info[:age_days]} days"
puts " Path: #{database_info[:file_path]}"
puts " Valid: #{geo_service.database_available? ? '✅' : '❌'}"
if database_info[:age_days] > Rails.application.config.maxmind.max_age_days
puts " ⚠️ Database is outdated (older than #{Rails.application.config.maxmind.max_age_days} days)"
end
else
puts "❌ No active GeoIP database found"
puts ""
puts "To download the database, run:"
puts " rails geoip:update"
end
puts ""
puts "Configuration:"
config = Rails.application.config.maxmind
puts " Auto-updates: #{config.auto_update_enabled ? '✅' : '❌'}"
puts " Update interval: #{config.update_interval_days} days"
puts " Max age: #{config.max_age_days} days"
puts " Storage path: #{config.storage_path}"
end
desc "Enrich existing events missing country codes"
task enrich_missing: :environment do
puts "Finding events without country codes..."
events_without_country = Event.where(country_code: [nil, ''])
.where.not(ip_address: [nil, ''])
.limit(1000) # Process in batches
if events_without_country.empty?
puts "✅ No events found missing country codes"
exit 0
end
puts "Found #{events_without_country.count} events without country codes"
puts "Processing batch..."
geo_service = GeoIpService.new
updated_count = 0
events_without_country.find_each do |event|
country = geo_service.lookup_country(event.ip_address)
if country.present?
event.update!(country_code: country)
updated_count += 1
end
# Add small delay to avoid overwhelming the service
sleep(0.01) if updated_count % 100 == 0
end
puts "✅ Updated #{updated_count} events with country codes"
end
desc "Test GeoIP service with sample IPs"
task test: :environment do
puts "Testing GeoIP service..."
puts "=" * 30
test_ips = [
'8.8.8.8', # Google DNS (US)
'1.1.1.1', # Cloudflare DNS (US)
'208.67.222.222', # OpenDNS (US)
'9.9.9.9', # Quad9 DNS (US)
'8.8.4.4', # Google DNS (US)
'1.0.0.1', # Cloudflare DNS (AU)
'64.6.64.6', # Verisign DNS (US)
'94.140.14.14', # AdGuard DNS (DE)
'2001:4860:4860::8888', # Google DNS IPv6
'2606:4700:4700::1111' # Cloudflare DNS IPv6
]
geo_service = GeoIpService.new
if !geo_service.database_available?
puts "❌ GeoIP database not available. Please run 'rails geoip:update' first."
exit 1
end
puts "Database info:"
info = geo_service.database_info
puts " Type: #{info[:type]}"
puts " Version: #{info[:version]}"
puts " Size: #{number_to_human_size(info[:size])}"
puts ""
test_ips.each do |ip|
begin
country = geo_service.lookup_country(ip)
puts "#{ip.ljust(30)}#{country || 'Unknown'}"
rescue => e
puts "#{ip.ljust(30)} → Error: #{e.message}"
end
end
end
private
def number_to_human_size(bytes)
units = %w[B KB MB GB TB]
size = bytes.to_f
unit_index = 0
while size >= 1024 && unit_index < units.length - 1
size /= 1024
unit_index += 1
end
"#{size.round(2)} #{units[unit_index]}"
end
end

View File

@@ -0,0 +1,195 @@
# frozen_string_literal: true
require "test_helper"
module Api
class RulesControllerTest < ActionDispatch::IntegrationTest
setup do
@project = Project.create!(
name: "Test Project",
slug: "test-project",
public_key: "test-key-#{SecureRandom.hex(8)}"
)
@rule1 = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
source: "manual"
)
@rule2 = Rule.create!(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "0.0.0.0/0", scope: "global" },
metadata: { limit: 100, window: 60 }
)
end
test "version endpoint returns correct structure" do
get "/api/#{@project.public_key}/rules/version"
assert_response :success
json = JSON.parse(response.body)
assert json["version"].present?
assert_equal 2, json["count"]
assert json["sampling"].present?
assert json["sampling"]["allowed_requests"].present?
assert json["sampling"]["blocked_requests"].present?
assert json["sampling"]["load_level"].present?
end
test "version endpoint requires valid project key" do
get "/api/invalid-key/rules/version"
assert_response :unauthorized
json = JSON.parse(response.body)
assert_equal "Invalid project key", json["error"]
end
test "version endpoint rejects disabled projects" do
@project.update!(enabled: false)
get "/api/#{@project.public_key}/rules/version"
assert_response :forbidden
json = JSON.parse(response.body)
assert_equal "Project is disabled", json["error"]
end
test "index endpoint returns all active rules" do
get "/api/#{@project.public_key}/rules"
assert_response :success
json = JSON.parse(response.body)
assert json["version"].present?
assert json["sampling"].present?
assert_equal 2, json["rules"].length
rule = json["rules"].find { |r| r["id"] == @rule1.id }
assert_equal "network_v4", rule["rule_type"]
assert_equal "deny", rule["action"]
assert_equal({ "cidr" => "10.0.0.0/8" }, rule["conditions"])
assert_equal 8, rule["priority"]
end
test "index endpoint excludes disabled rules" do
@rule1.update!(enabled: false)
get "/api/#{@project.public_key}/rules"
assert_response :success
json = JSON.parse(response.body)
assert_equal 1, json["rules"].length
assert_equal @rule2.id, json["rules"].first["id"]
end
test "index endpoint excludes expired rules" do
@rule1.update!(expires_at: 1.hour.ago)
get "/api/#{@project.public_key}/rules"
assert_response :success
json = JSON.parse(response.body)
assert_equal 1, json["rules"].length
assert_equal @rule2.id, json["rules"].first["id"]
end
test "index endpoint with since parameter returns recent rules" do
# Update rule1 to be older
@rule1.update_column(:updated_at, 2.hours.ago)
since_time = 1.hour.ago.iso8601
get "/api/#{@project.public_key}/rules?since=#{since_time}"
assert_response :success
json = JSON.parse(response.body)
assert_equal 1, json["rules"].length
assert_equal @rule2.id, json["rules"].first["id"]
end
test "index endpoint with since parameter includes disabled rules" do
@rule1.update!(enabled: false) # This updates updated_at
since_time = 1.minute.ago.iso8601
get "/api/#{@project.public_key}/rules?since=#{since_time}"
assert_response :success
json = JSON.parse(response.body)
# Should include the disabled rule for agent to remove it
disabled_rule = json["rules"].find { |r| r["id"] == @rule1.id }
assert disabled_rule.present?
assert_equal false, disabled_rule["enabled"]
end
test "index endpoint with invalid timestamp returns error" do
get "/api/#{@project.public_key}/rules?since=invalid-timestamp"
assert_response :bad_request
json = JSON.parse(response.body)
assert json["error"].include?("Invalid timestamp format")
end
test "index endpoint requires authentication" do
get "/api/invalid-key/rules"
assert_response :unauthorized
end
test "index endpoint includes sampling information" do
get "/api/#{@project.public_key}/rules"
assert_response :success
json = JSON.parse(response.body)
sampling = json["sampling"]
assert_equal 1.0, sampling["allowed_requests"]
assert_equal 1.0, sampling["blocked_requests"]
assert_equal 1.0, sampling["rate_limited_requests"]
assert sampling["effective_until"].present?
assert_equal "normal", sampling["load_level"]
end
test "rules are ordered by updated_at for sync" do
# Create rules with different timestamps
oldest = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.1.0/24" }
)
oldest.update_column(:updated_at, 3.hours.ago)
middle = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.2.0/24" }
)
middle.update_column(:updated_at, 2.hours.ago)
newest = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.3.0/24" }
)
get "/api/#{@project.public_key}/rules?since=#{4.hours.ago.iso8601}"
assert_response :success
json = JSON.parse(response.body)
ids = json["rules"].map { |r| r["id"] }
# Should be ordered oldest to newest by updated_at
assert_equal [oldest.id, middle.id], ids.first(2)
assert_equal newest.id, ids.last
end
end
end

2
test/fixtures/ipv4_ranges.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
# Empty fixtures - tests create their own data

2
test/fixtures/ipv6_ranges.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
# Empty fixtures - tests create their own data

View File

@@ -1,37 +0,0 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
ip_address:
network_prefix: 1
ip_version: 1
company: MyString
asn: 1
asn_org: MyString
is_datacenter: false
is_proxy: false
is_vpn: false
ip_api_country: MyString
geo2_country: MyString
abuser_scores: MyText
additional_data: MyText
created_at: 2025-11-02 14:01:11
updated_at: 2025-11-02 14:01:11
last_api_fetch: 2025-11-02 14:01:11
two:
ip_address:
network_prefix: 1
ip_version: 1
company: MyString
asn: 1
asn_org: MyString
is_datacenter: false
is_proxy: false
is_vpn: false
ip_api_country: MyString
geo2_country: MyString
abuser_scores: MyText
additional_data: MyText
created_at: 2025-11-02 14:01:11
updated_at: 2025-11-02 14:01:11
last_api_fetch: 2025-11-02 14:01:11

View File

@@ -1,11 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
segment: MyString
usage_count: 1
first_seen_at: 2025-11-03 10:24:38
two:
segment: MyString
usage_count: 1
first_seen_at: 2025-11-03 10:24:38

View File

@@ -1,7 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
action: MyString
two:
action: MyString

View File

@@ -1,11 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
hostname: MyString
usage_count: 1
first_seen_at: 2025-11-03 10:24:29
two:
hostname: MyString
usage_count: 1
first_seen_at: 2025-11-03 10:24:29

View File

@@ -1,7 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
method: MyString
two:
method: MyString

View File

@@ -1,7 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
protocol: MyString
two:
protocol: MyString

View File

@@ -1,15 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
name: MyString
description: MyText
enabled: false
projects:
rules:
two:
name: MyString
description: MyText
enabled: false
projects:
rules:

View File

@@ -1,23 +1 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Empty fixtures
one:
rule_set: one
rule_type: MyString
target: MyString
action: MyString
enabled: false
expires_at: 2025-11-02 19:10:14
priority: 1
conditions:
metadata:
two:
rule_set: two
rule_type: MyString
target: MyString
action: MyString
enabled: false
expires_at: 2025-11-02 19:10:14
priority: 1
conditions:
metadata:

View File

@@ -0,0 +1,138 @@
# frozen_string_literal: true
require "test_helper"
class ExpiredRulesCleanupJobTest < ActiveJob::TestCase
test "disables expired rules" do
expired_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
expires_at: 1.hour.ago,
enabled: true
)
active_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.0.0/16" },
expires_at: 1.hour.from_now,
enabled: true
)
count = ExpiredRulesCleanupJob.perform_now
assert_equal 1, count
assert_not expired_rule.reload.enabled?
assert active_rule.reload.enabled?
end
test "does not affect rules without expiration" do
permanent_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
expires_at: nil,
enabled: true
)
ExpiredRulesCleanupJob.perform_now
assert permanent_rule.reload.enabled?
end
test "does not affect already disabled rules" do
disabled_expired_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
expires_at: 1.hour.ago,
enabled: false
)
count = ExpiredRulesCleanupJob.perform_now
assert_equal 0, count
assert_not disabled_expired_rule.reload.enabled?
end
test "updates updated_at timestamp when disabling" do
expired_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
expires_at: 1.hour.ago,
enabled: true
)
original_updated_at = expired_rule.updated_at
sleep 0.01 # Ensure time passes
ExpiredRulesCleanupJob.perform_now
assert expired_rule.reload.updated_at > original_updated_at
end
test "deletes old disabled rules when running at 1am" do
old_disabled_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
enabled: false
)
old_disabled_rule.update_column(:updated_at, 31.days.ago)
recent_disabled_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.0.0/16" },
enabled: false
)
Time.stub :current, Time.current.change(hour: 1) do
ExpiredRulesCleanupJob.perform_now
end
assert_raises(ActiveRecord::RecordNotFound) { old_disabled_rule.reload }
assert_nothing_raised { recent_disabled_rule.reload }
end
test "does not delete old rules when not running at 1am" do
old_disabled_rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
enabled: false
)
old_disabled_rule.update_column(:updated_at, 31.days.ago)
Time.stub :current, Time.current.change(hour: 10) do
ExpiredRulesCleanupJob.perform_now
end
assert_nothing_raised { old_disabled_rule.reload }
end
test "returns count of disabled rules" do
3.times do |i|
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.#{i}.0.0/16" },
expires_at: 1.hour.ago,
enabled: true
)
end
count = ExpiredRulesCleanupJob.perform_now
assert_equal 3, count
end
test "returns zero when no expired rules" do
count = ExpiredRulesCleanupJob.perform_now
assert_equal 0, count
end
end

View File

@@ -0,0 +1,251 @@
# frozen_string_literal: true
require "test_helper"
class PathScannerDetectorJobTest < ActiveJob::TestCase
setup do
@project = Project.first || Project.create!(
name: "Test Project",
slug: "test-project",
public_key: SecureRandom.hex(16)
)
end
test "creates ban rule for IP hitting scanner paths" do
ip = "192.168.1.100"
# Create events hitting scanner paths
["/.env", "/.git", "/wp-admin"].each do |path|
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: path,
waf_action: "allow"
)
end
count = PathScannerDetectorJob.perform_now
assert_equal 1, count
rule = Rule.where(source: "auto:scanner_detected").last
assert_not_nil rule
assert_equal "network_v4", rule.rule_type
assert_equal "deny", rule.action
assert_equal "#{ip}/32", rule.cidr
assert_equal 32, rule.priority
assert rule.enabled?
end
test "sets 24 hour expiration on ban rules" do
ip = "192.168.1.100"
3.times do |i|
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
PathScannerDetectorJob.perform_now
rule = Rule.where(source: "auto:scanner_detected").last
assert_not_nil rule.expires_at
# Should expire in approximately 24 hours
time_until_expiry = rule.expires_at - Time.current
assert time_until_expiry > 23.hours
assert time_until_expiry < 25.hours
end
test "includes metadata about detected paths" do
ip = "192.168.1.100"
paths = ["/.env", "/.git", "/wp-admin"]
paths.each do |path|
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: path,
waf_action: "allow"
)
end
PathScannerDetectorJob.perform_now
rule = Rule.where(source: "auto:scanner_detected").last
assert_equal 3, rule.metadata["hit_count"]
assert_equal paths.sort, rule.metadata["paths"].sort
assert rule.metadata["reason"].include?("Scanner detected")
assert rule.metadata["auto_generated"]
end
test "does not create rule for insufficient hits" do
ip = "192.168.1.100"
# Only 2 hits, minimum is 3
2.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
count = PathScannerDetectorJob.perform_now
assert_equal 0, count
end
test "only considers recent events" do
ip = "192.168.1.100"
# Old event (outside lookback window)
old_event = Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: 10.minutes.ago,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
# Recent events
2.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.git",
waf_action: "allow"
)
end
count = PathScannerDetectorJob.perform_now
# Should not find sufficient hits (only 2 recent, 1 old)
assert_equal 0, count
end
test "does not create duplicate rules for existing IP" do
ip = "192.168.1.100"
# Create existing rule
Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "#{ip}/32" },
enabled: true
)
# Create scanner events
3.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
count = PathScannerDetectorJob.perform_now
assert_equal 0, count
end
test "handles IPv6 addresses" do
ip = "2001:db8::1"
3.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
count = PathScannerDetectorJob.perform_now
assert_equal 1, count
rule = Rule.where(source: "auto:scanner_detected").last
assert_equal "network_v6", rule.rule_type
assert_equal "#{ip}/32", rule.cidr
end
test "creates separate rules for different IPs" do
ip1 = "192.168.1.100"
ip2 = "192.168.1.101"
[ip1, ip2].each do |ip|
3.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
end
count = PathScannerDetectorJob.perform_now
assert_equal 2, count
end
test "handles invalid IP addresses gracefully" do
# Create event with invalid IP
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: "invalid-ip",
request_path: "/.env",
waf_action: "allow"
)
assert_nothing_raised do
PathScannerDetectorJob.perform_now
end
end
test "returns count of created rules" do
3.times do |i|
ip = "192.168.1.#{100 + i}"
3.times do
Event.create!(
project: @project,
event_id: SecureRandom.uuid,
timestamp: Time.current,
ip_address: ip,
request_path: "/.env",
waf_action: "allow"
)
end
end
count = PathScannerDetectorJob.perform_now
assert_equal 3, count
end
end

292
test/models/event_test.rb Normal file
View File

@@ -0,0 +1,292 @@
# frozen_string_literal: true
require "test_helper"
class EventTest < ActiveSupport::TestCase
def setup
@project = Project.create!(name: "Test Project", slug: "test-project")
@sample_payload = {
"event_id" => "test-event-123",
"timestamp" => Time.now.iso8601,
"request" => {
"ip" => "192.168.1.1",
"method" => "GET",
"path" => "/api/test",
"headers" => {
"host" => "example.com",
"user-agent" => "TestAgent/1.0",
"content-type" => "application/json"
},
"query" => { "param" => "value" }
},
"response" => {
"status_code" => 200,
"duration_ms" => 150,
"size" => 1024
},
"waf_action" => "allow",
"server_name" => "test-server",
"environment" => "test",
"geo" => {
"country_code" => "US",
"city" => "Test City"
},
"tags" => { "source" => "test" },
"agent" => {
"name" => "baffle-agent",
"version" => "1.0.0"
}
}
end
def teardown
Event.delete_all # Delete events first to avoid foreign key constraints
Project.delete_all
end
test "create_from_waf_payload! creates event with proper enum values" do
event = Event.create_from_waf_payload!("test-123", @sample_payload, @project)
assert event.persisted?
assert_equal @project, event.project
assert_equal "test-123", event.event_id
assert_equal "192.168.1.1", event.ip_address
assert_equal "/api/test", event.request_path
assert_equal 200, event.response_status
assert_equal 150, event.response_time_ms
assert_equal "test-server", event.server_name
assert_equal "test", event.environment
assert_equal "US", event.country_code
assert_equal "Test City", event.city
assert_equal "baffle-agent", event.agent_name
assert_equal "1.0.0", event.agent_version
end
test "create_from_waf_payload! properly normalizes request_method enum" do
test_methods = ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"]
expected_enums = [:get, :post, :put, :patch, :delete, :head, :options]
test_methods.each_with_index do |method, index|
payload = @sample_payload.dup
payload["request"]["method"] = method
payload["event_id"] = "test-method-#{method.downcase}"
event = Event.create_from_waf_payload!("test-method-#{method.downcase}", payload, @project)
assert_equal expected_enums[index].to_s, event.request_method,
"Method #{method} should map to enum #{expected_enums[index]}"
assert_equal index, event.request_method_before_type_cast,
"Method #{method} should be stored as integer #{index}"
end
end
test "create_from_waf_payload! properly normalizes waf_action enum" do
test_actions = [
["allow", :allow, 0],
["pass", :allow, 0],
["deny", :deny, 1],
["block", :deny, 1],
["redirect", :redirect, 2],
["challenge", :challenge, 3],
["unknown", :allow, 0] # Default fallback
]
test_actions.each do |action, expected_enum, expected_int|
payload = @sample_payload.dup
payload["waf_action"] = action
payload["event_id"] = "test-action-#{action}"
event = Event.create_from_waf_payload!("test-action-#{action}", payload, @project)
assert_equal expected_enum.to_s, event.waf_action,
"Action #{action} should map to enum #{expected_enum}"
assert_equal expected_int, event.waf_action_before_type_cast,
"Action #{action} should be stored as integer #{expected_int}"
end
end
test "create_from_waf_payload! handles header case normalization" do
payload = @sample_payload.dup
payload["request"]["headers"] = {
"HOST" => "EXAMPLE.COM",
"User-Agent" => "TestAgent/1.0",
"CONTENT-TYPE" => "application/json"
}
event = Event.create_from_waf_payload!("test-headers", payload, @project)
assert_equal "TestAgent/1.0", event.user_agent
# The normalize_payload_headers method should normalize header keys to lowercase
# but keep values as-is
assert_equal "EXAMPLE.COM", event.headers["host"]
assert_equal "application/json", event.headers["content-type"]
end
test "enum values persist after save and reload" do
event = Event.create_from_waf_payload!("test-persist", @sample_payload, @project)
# Verify initial values
assert_equal "get", event.request_method
assert_equal "allow", event.waf_action
assert_equal 0, event.request_method_before_type_cast
assert_equal 0, event.waf_action_before_type_cast
# Reload from database
event.reload
# Values should still be correct
assert_equal "get", event.request_method
assert_equal "allow", event.waf_action
assert_equal 0, event.request_method_before_type_cast
assert_equal 0, event.waf_action_before_type_cast
end
test "enum scopes work correctly" do
# Create events with different methods and actions
Event.create_from_waf_payload!("get-allow", @sample_payload, @project)
post_payload = @sample_payload.dup
post_payload["request"]["method"] = "POST"
post_payload["event_id"] = "post-allow"
Event.create_from_waf_payload!("post-allow", post_payload, @project)
deny_payload = @sample_payload.dup
deny_payload["waf_action"] = "deny"
deny_payload["event_id"] = "get-deny"
Event.create_from_waf_payload!("get-deny", deny_payload, @project)
# Test method scopes - use string values for enum queries
get_events = Event.where(request_method: "get")
post_events = Event.where(request_method: "post")
assert_equal 2, get_events.count
assert_equal 1, post_events.count
# Test action scopes - use string values for enum queries
allowed_events = Event.where(waf_action: "allow")
denied_events = Event.where(waf_action: "deny")
assert_equal 2, allowed_events.count
assert_equal 1, denied_events.count
end
test "event normalization is triggered when needed" do
# Create event without enum values (simulating old data)
event = Event.create!(
project: @project,
event_id: "normalization-test",
timestamp: Time.current,
payload: @sample_payload,
ip_address: "192.168.1.1",
request_path: "/test",
# Don't set request_method or waf_action to trigger normalization
request_method: nil,
waf_action: nil
)
# Manually set the raw values that would normally be extracted
event.instance_variable_set(:@raw_request_method, "POST")
event.instance_variable_set(:@raw_action, "deny")
# Trigger normalization
event.send(:normalize_event_fields)
event.save!
# Verify normalization worked
event.reload
assert_equal "post", event.request_method
assert_equal "deny", event.waf_action
assert_equal 1, event.request_method_before_type_cast # POST = 1
assert_equal 1, event.waf_action_before_type_cast # DENY = 1
end
test "payload extraction methods work correctly" do
event = Event.create_from_waf_payload!("extraction-test", @sample_payload, @project)
# Test request_details
request_details = event.request_details
assert_equal "192.168.1.1", request_details[:ip]
assert_equal "GET", request_details[:method]
assert_equal "/api/test", request_details[:path]
assert_equal "example.com", request_details[:headers]["host"]
# Test response_details
response_details = event.response_details
assert_equal 200, response_details[:status_code]
assert_equal 150, response_details[:duration_ms]
assert_equal 1024, response_details[:size]
# Test geo_details
geo_details = event.geo_details
assert_equal "US", geo_details["country_code"]
assert_equal "Test City", geo_details["city"]
# Test tags
tags = event.tags
assert_equal "test", tags["source"]
end
test "helper methods work correctly" do
event = Event.create_from_waf_payload!("helper-test", @sample_payload, @project)
# Test boolean methods
assert event.allowed?
assert_not event.blocked?
assert_not event.rate_limited?
assert_not event.challenged?
assert_not event.rule_matched?
# Test path methods
assert_equal ["api", "test"], event.path_segments
assert_equal 2, event.path_depth
end
test "timestamp parsing works with various formats" do
timestamps = [
Time.now.iso8601,
(Time.now.to_f * 1000).to_i, # Unix timestamp in milliseconds
Time.now.utc # Time object
]
timestamps.each_with_index do |timestamp, index|
payload = @sample_payload.dup
payload["timestamp"] = timestamp
payload["event_id"] = "timestamp-test-#{index}"
event = Event.create_from_waf_payload!("timestamp-test-#{index}", payload, @project)
assert event.timestamp.is_a?(Time), "Timestamp #{index} should be parsed as Time"
assert_not event.timestamp.nil?
end
end
test "handles missing optional fields gracefully" do
minimal_payload = {
"event_id" => "minimal-test",
"timestamp" => Time.now.iso8601,
"request" => {
"ip" => "10.0.0.1",
"method" => "GET",
"path" => "/simple"
},
"response" => {
"status_code" => 404
}
}
event = Event.create_from_waf_payload!("minimal-test", minimal_payload, @project)
assert event.persisted?
assert_equal "10.0.0.1", event.ip_address
assert_equal "get", event.request_method
assert_equal "/simple", event.request_path
assert_equal 404, event.response_status
# Optional fields should be nil
assert_nil event.user_agent
assert_nil event.response_time_ms
assert_nil event.country_code
assert_nil event.city
assert_nil event.agent_name
assert_nil event.agent_version
end
end

View File

@@ -0,0 +1,122 @@
# frozen_string_literal: true
require "test_helper"
class Ipv4RangeTest < ActiveSupport::TestCase
test "creates range from CIDR notation" do
range = Ipv4Range.create!(cidr: "192.168.1.0/24")
assert_equal 24, range.network_prefix
assert range.network_start.present?
assert range.network_end.present?
assert range.network_start < range.network_end
end
test "calculates correct range for /32 single IP" do
range = Ipv4Range.create!(cidr: "192.168.1.100/32")
assert_equal 32, range.network_prefix
assert_equal range.network_start, range.network_end
end
test "calculates correct range for /8 large network" do
range = Ipv4Range.create!(cidr: "10.0.0.0/8")
assert_equal 8, range.network_prefix
# 10.0.0.0 to 10.255.255.255
ip_start = IPAddr.new("10.0.0.0").to_i
ip_end = IPAddr.new("10.255.255.255").to_i
assert_equal ip_start, range.network_start
assert_equal ip_end, range.network_end
end
test "validates network_prefix range" do
range = Ipv4Range.new(cidr: "192.168.1.0/24")
range.network_prefix = 33
assert_not range.valid?
assert_includes range.errors[:network_prefix], "must be less than or equal to 32"
end
test "contains_ip? returns true for IP in range" do
range = Ipv4Range.create!(cidr: "192.168.1.0/24")
assert range.contains_ip?("192.168.1.1")
assert range.contains_ip?("192.168.1.100")
assert range.contains_ip?("192.168.1.255")
end
test "contains_ip? returns false for IP outside range" do
range = Ipv4Range.create!(cidr: "192.168.1.0/24")
assert_not range.contains_ip?("192.168.2.1")
assert_not range.contains_ip?("10.0.0.1")
end
test "contains_ip class method finds matching ranges" do
range1 = Ipv4Range.create!(cidr: "10.0.0.0/8")
range2 = Ipv4Range.create!(cidr: "192.168.1.0/24")
results = Ipv4Range.contains_ip("10.5.10.50")
assert_includes results, range1
assert_not_includes results, range2
end
test "contains_ip returns most specific range first" do
broad_range = Ipv4Range.create!(cidr: "10.0.0.0/8")
specific_range = Ipv4Range.create!(cidr: "10.0.1.0/24")
results = Ipv4Range.contains_ip("10.0.1.50")
assert_equal specific_range, results.first
end
test "to_cidr returns CIDR notation" do
range = Ipv4Range.create!(cidr: "192.168.1.0/24")
assert_equal "192.168.1.0/24", range.to_cidr
end
test "datacenter scope returns datacenter IPs" do
datacenter = Ipv4Range.create!(cidr: "1.2.3.0/24", is_datacenter: true)
regular = Ipv4Range.create!(cidr: "192.168.1.0/24", is_datacenter: false)
results = Ipv4Range.datacenter
assert_includes results, datacenter
assert_not_includes results, regular
end
test "stores and retrieves JSON metadata" do
range = Ipv4Range.create!(cidr: "1.2.3.0/24")
range.abuser_scores_hash = { "spam" => 0.8, "malware" => 0.3 }
range.save!
range.reload
scores = range.abuser_scores_hash
assert_equal 0.8, scores["spam"]
assert_equal 0.3, scores["malware"]
end
test "stores IP intelligence metadata" do
range = Ipv4Range.create!(
cidr: "1.2.3.0/24",
company: "Example Corp",
asn: 12345,
asn_org: "AS Example",
is_datacenter: true,
is_proxy: false,
is_vpn: false,
ip_api_country: "US"
)
assert_equal "Example Corp", range.company
assert_equal 12345, range.asn
assert range.is_datacenter?
assert_not range.is_proxy?
end
end

View File

@@ -0,0 +1,107 @@
# frozen_string_literal: true
require "test_helper"
class Ipv6RangeTest < ActiveSupport::TestCase
test "creates range from CIDR notation" do
range = Ipv6Range.create!(cidr: "2001:db8::/32")
assert_equal 32, range.network_prefix
assert range.network_start.present?
assert range.network_end.present?
end
test "calculates correct range for /128 single IP" do
range = Ipv6Range.create!(cidr: "2001:db8::1/128")
assert_equal 128, range.network_prefix
assert_equal range.network_start, range.network_end
end
test "validates network_prefix range" do
range = Ipv6Range.new(cidr: "2001:db8::/32")
range.network_prefix = 129
assert_not range.valid?
assert_includes range.errors[:network_prefix], "must be less than or equal to 128"
end
test "contains_ip? returns true for IP in range" do
range = Ipv6Range.create!(cidr: "2001:db8::/32")
assert range.contains_ip?("2001:db8::1")
assert range.contains_ip?("2001:db8:ffff:ffff:ffff:ffff:ffff:ffff")
end
test "contains_ip? returns false for IP outside range" do
range = Ipv6Range.create!(cidr: "2001:db8::/32")
assert_not range.contains_ip?("2001:db9::1")
assert_not range.contains_ip?("fe80::1")
end
test "contains_ip class method finds matching ranges" do
range1 = Ipv6Range.create!(cidr: "2001:db8::/32")
range2 = Ipv6Range.create!(cidr: "fe80::/10")
results = Ipv6Range.contains_ip("2001:db8::1")
assert_includes results, range1
assert_not_includes results, range2
end
test "contains_ip returns most specific range first" do
broad_range = Ipv6Range.create!(cidr: "2001:db8::/32")
specific_range = Ipv6Range.create!(cidr: "2001:db8:1::/48")
results = Ipv6Range.contains_ip("2001:db8:1::5")
assert_equal specific_range, results.first
end
test "to_cidr returns CIDR notation" do
range = Ipv6Range.create!(cidr: "2001:db8::/32")
# IPv6 addresses can be formatted differently
assert range.to_cidr.include?("2001:db8")
assert range.to_cidr.include?("/32")
end
test "datacenter scope returns datacenter IPs" do
datacenter = Ipv6Range.create!(cidr: "2001:db8::/32", is_datacenter: true)
regular = Ipv6Range.create!(cidr: "fe80::/10", is_datacenter: false)
results = Ipv6Range.datacenter
assert_includes results, datacenter
assert_not_includes results, regular
end
test "stores and retrieves JSON metadata" do
range = Ipv6Range.create!(cidr: "2001:db8::/32")
range.additional_data_hash = { "notes" => "Test network", "verified" => true }
range.save!
range.reload
data = range.additional_data_hash
assert_equal "Test network", data["notes"]
assert_equal true, data["verified"]
end
test "stores IP intelligence metadata" do
range = Ipv6Range.create!(
cidr: "2001:db8::/32",
company: "IPv6 Corp",
asn: 65000,
asn_org: "AS IPv6",
is_proxy: true,
ip_api_country: "GB"
)
assert_equal "IPv6 Corp", range.company
assert_equal 65000, range.asn
assert range.is_proxy?
assert_equal "GB", range.ip_api_country
end
end

View File

@@ -1,7 +0,0 @@
require "test_helper"
class NetworkRangeTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end

View File

@@ -1,7 +1,179 @@
# frozen_string_literal: true
require "test_helper" require "test_helper"
class RuleTest < ActiveSupport::TestCase class RuleTest < ActiveSupport::TestCase
# test "the truth" do # Validation tests
# assert true test "should create valid network_v4 rule" do
# end rule = Rule.new(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
source: "manual"
)
assert rule.valid?
rule.save!
assert_equal 8, rule.priority # Auto-calculated from CIDR prefix
end
test "should create valid network_v6 rule" do
rule = Rule.new(
rule_type: "network_v6",
action: "deny",
conditions: { cidr: "2001:db8::/32" },
source: "manual"
)
assert rule.valid?
rule.save!
assert_equal 32, rule.priority
end
test "should create valid rate_limit rule" do
rule = Rule.new(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "0.0.0.0/0", scope: "global" },
metadata: { limit: 100, window: 60 },
source: "manual"
)
assert rule.valid?
end
test "should create valid path_pattern rule" do
rule = Rule.new(
rule_type: "path_pattern",
action: "log",
conditions: { patterns: ["/.env", "/.git"] },
source: "default"
)
assert rule.valid?
end
test "should require rule_type" do
rule = Rule.new(action: "deny", conditions: { cidr: "10.0.0.0/8" })
assert_not rule.valid?
assert_includes rule.errors[:rule_type], "can't be blank"
end
test "should require action" do
rule = Rule.new(rule_type: "network_v4", conditions: { cidr: "10.0.0.0/8" })
assert_not rule.valid?
assert_includes rule.errors[:action], "can't be blank"
end
test "should validate network_v4 has valid IPv4 CIDR" do
rule = Rule.new(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "2001:db8::/32" } # IPv6 in IPv4 rule
)
assert_not rule.valid?
assert_includes rule.errors[:conditions], "cidr must be IPv4 for network_v4 rules"
end
test "should validate rate_limit has limit and window in metadata" do
rule = Rule.new(
rule_type: "rate_limit",
action: "rate_limit",
conditions: { cidr: "0.0.0.0/0", scope: "global" },
metadata: { limit: 100 } # Missing window
)
assert_not rule.valid?
assert_includes rule.errors[:metadata], "must include 'limit' and 'window' for rate_limit rules"
end
# Default value tests
test "should default enabled to true" do
rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" }
)
assert rule.enabled?
end
# Priority calculation tests
test "should calculate priority from IPv4 CIDR prefix" do
rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.1.0/24" }
)
assert_equal 24, rule.priority
end
# Scope tests
test "active scope returns enabled and non-expired rules" do
active = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
enabled: true
)
disabled = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "192.168.0.0/16" },
enabled: false
)
expired = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "172.16.0.0/12" },
enabled: true,
expires_at: 1.hour.ago
)
results = Rule.active.to_a
assert_includes results, active
assert_not_includes results, disabled
assert_not_includes results, expired
end
# Instance method tests
test "active? returns true for enabled non-expired rule" do
rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
enabled: true
)
assert rule.active?
end
test "disable! sets enabled to false and adds metadata" do
rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" }
)
rule.disable!(reason: "False positive")
assert_not rule.enabled?
assert_equal "False positive", rule.metadata["disabled_reason"]
assert rule.metadata["disabled_at"].present?
end
# Agent format tests
test "to_agent_format returns correct structure" do
rule = Rule.create!(
rule_type: "network_v4",
action: "deny",
conditions: { cidr: "10.0.0.0/8" },
expires_at: 1.day.from_now,
source: "manual",
metadata: { reason: "Test" }
)
format = rule.to_agent_format
assert_equal rule.id, format[:id]
assert_equal "network_v4", format[:rule_type]
assert_equal "deny", format[:action]
assert_equal 8, format[:priority]
assert_equal true, format[:enabled]
end
end end

View File

@@ -0,0 +1,124 @@
# frozen_string_literal: true
require "test_helper"
class HubLoadTest < ActiveSupport::TestCase
test "normal load level with low queue depth" do
HubLoad.stub :queue_depth, 500 do
assert_equal :normal, HubLoad.calculate_load_level
end
end
test "moderate load level with moderate queue depth" do
HubLoad.stub :queue_depth, 3000 do
assert_equal :moderate, HubLoad.calculate_load_level
end
end
test "high load level with high queue depth" do
HubLoad.stub :queue_depth, 7500 do
assert_equal :high, HubLoad.calculate_load_level
end
end
test "critical load level with very high queue depth" do
HubLoad.stub :queue_depth, 15000 do
assert_equal :critical, HubLoad.calculate_load_level
end
end
test "current_sampling returns correct rates for normal load" do
HubLoad.stub :queue_depth, 500 do
sampling = HubLoad.current_sampling
assert_equal 1.0, sampling[:allowed_requests]
assert_equal 1.0, sampling[:blocked_requests]
assert_equal 1.0, sampling[:rate_limited_requests]
assert_equal :normal, sampling[:load_level]
assert_equal 500, sampling[:queue_depth]
assert sampling[:effective_until].present?
end
end
test "current_sampling reduces allowed requests under moderate load" do
HubLoad.stub :queue_depth, 3000 do
sampling = HubLoad.current_sampling
assert_equal 0.5, sampling[:allowed_requests]
assert_equal 1.0, sampling[:blocked_requests]
assert_equal 1.0, sampling[:rate_limited_requests]
assert_equal :moderate, sampling[:load_level]
end
end
test "current_sampling reduces allowed requests under high load" do
HubLoad.stub :queue_depth, 7500 do
sampling = HubLoad.current_sampling
assert_equal 0.2, sampling[:allowed_requests]
assert_equal 1.0, sampling[:blocked_requests]
assert_equal 1.0, sampling[:rate_limited_requests]
assert_equal :high, sampling[:load_level]
end
end
test "current_sampling minimizes allowed requests under critical load" do
HubLoad.stub :queue_depth, 15000 do
sampling = HubLoad.current_sampling
assert_equal 0.05, sampling[:allowed_requests]
assert_equal 1.0, sampling[:blocked_requests]
assert_equal 1.0, sampling[:rate_limited_requests]
assert_equal :critical, sampling[:load_level]
end
end
test "effective_until is approximately 10 seconds in future" do
sampling = HubLoad.current_sampling
effective_until = Time.parse(sampling[:effective_until])
time_diff = effective_until - Time.current
assert time_diff > 9, "effective_until should be ~10 seconds in future"
assert time_diff < 11, "effective_until should be ~10 seconds in future"
end
test "overloaded? returns false for normal and moderate load" do
HubLoad.stub :queue_depth, 500 do
assert_not HubLoad.overloaded?
end
HubLoad.stub :queue_depth, 3000 do
assert_not HubLoad.overloaded?
end
end
test "overloaded? returns true for high and critical load" do
HubLoad.stub :queue_depth, 7500 do
assert HubLoad.overloaded?
end
HubLoad.stub :queue_depth, 15000 do
assert HubLoad.overloaded?
end
end
test "stats returns complete load information" do
HubLoad.stub :queue_depth, 3000 do
stats = HubLoad.stats
assert_equal 3000, stats[:queue_depth]
assert_equal :moderate, stats[:load_level]
assert_equal false, stats[:overloaded]
assert_equal 0.5, stats[:sampling_rates][:allowed]
assert_equal 1.0, stats[:sampling_rates][:blocked]
end
end
test "handles queue depth query errors gracefully" do
# Simulate SolidQueue error
SolidQueue::Job.stub :where, -> (*) { raise StandardError, "DB error" } do
depth = HubLoad.queue_depth
assert_equal 0, depth # Should return 0 on error
end
end
end