diff --git a/Gemfile b/Gemfile index 7329c15..727920a 100644 --- a/Gemfile +++ b/Gemfile @@ -45,6 +45,12 @@ gem "image_processing", "~> 1.2" # Pagination gem "pagy" +# MaxMind GeoIP database reader +gem "maxmind-db" + +# HTTP client for database downloads +gem "httparty" + group :development, :test do # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem gem "debug", platforms: %i[ mri windows ], require: "debug/prelude" diff --git a/Gemfile.lock b/Gemfile.lock index 154b512..3046f34 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -102,6 +102,7 @@ GEM concurrent-ruby (1.3.5) connection_pool (2.5.4) crass (1.0.6) + csv (3.3.5) date (3.5.0) debug (1.11.0) irb (~> 1.10) @@ -125,6 +126,10 @@ GEM raabro (~> 1.4) globalid (1.3.0) activesupport (>= 6.1) + httparty (0.23.2) + csv + mini_mime (>= 1.0.0) + multi_xml (>= 0.5.2) i18n (1.14.7) concurrent-ruby (~> 1.0) image_processing (1.14.0) @@ -168,11 +173,14 @@ GEM net-smtp marcel (1.1.0) matrix (0.4.3) + maxmind-db (1.3.2) mini_magick (5.3.1) logger mini_mime (1.1.5) minitest (5.26.0) msgpack (1.8.0) + multi_xml (0.7.2) + bigdecimal (~> 3.1) net-imap (0.5.12) date net-protocol @@ -402,10 +410,12 @@ DEPENDENCIES bundler-audit capybara debug + httparty image_processing (~> 1.2) importmap-rails jbuilder kamal + maxmind-db pagy propshaft puma (>= 5.0) diff --git a/README.md b/README.md index 352e203..e6646ac 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Baffle Hub -**Rails 8 WAF analytics and automated rule management system** +**Rails 8 WAF analytics and automated rule management system** ⚠️ **Experimental** Baffle Hub provides intelligent Web Application Firewall (WAF) analytics with automated rule generation. It combines real-time threat detection with SQLite-based local storage for ultra-fast request filtering. @@ -12,12 +12,36 @@ Baffle Hub provides intelligent Web Application Firewall (WAF) analytics with au - **Forward Auth Integration** - Compatible with Caddy, Traefik, and NGINX - **Docker Ready** - Containerized deployment with Kamal +## Status + +### ✅ Complete +- Event ingestion API with DSN authentication +- Comprehensive data normalization (hosts, paths, IPs) +- Basic analytics dashboard +- Background job processing system +- Docker deployment setup + +### 🚧 In Progress +- Rule management framework +- IP range blocking rules +- Country-based blocking (via IP ranges) +- Forward auth endpoint implementation + +### 📋 TODO +- Advanced pattern analysis and threat detection +- Automatic rule generation algorithms +- Rate limiting engine +- Challenge/redirect mechanisms +- Unix socket support for ultra-low latency +- Multi-node rule synchronization +- Advanced analytics visualizations +- Real-time rule updates + ## Quick Start ### Prerequisites - Ruby 3.x -- PostgreSQL 14+ - Docker (optional) ### Installation diff --git a/app/controllers/api/events_controller.rb b/app/controllers/api/events_controller.rb index 7d0ddf7..4169547 100644 --- a/app/controllers/api/events_controller.rb +++ b/app/controllers/api/events_controller.rb @@ -67,7 +67,8 @@ class Api::EventsController < ApplicationController headers = {} important_headers.each do |header| value = request.headers[header] - headers[header] = value if value.present? + # Standardize headers to lower case during import phase + headers[header.downcase] = value if value.present? end headers diff --git a/app/controllers/api/rules_controller.rb b/app/controllers/api/rules_controller.rb new file mode 100644 index 0000000..93f711e --- /dev/null +++ b/app/controllers/api/rules_controller.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Api + class RulesController < ApplicationController + skip_before_action :verify_authenticity_token + before_action :authenticate_project! + before_action :check_project_enabled + + # GET /api/:public_key/rules/version + # Quick version check - returns latest updated_at timestamp + def version + render json: { + version: Rule.latest_version, + count: Rule.active.count, + sampling: HubLoad.current_sampling + } + end + + # GET /api/:public_key/rules?since=2024-11-03T12:00:00.000Z + # Incremental sync - returns rules updated since timestamp + # GET /api/:public_key/rules + # Full sync - returns all active rules + def index + rules = if params[:since].present? + # Incremental sync + since_time = parse_timestamp(params[:since]) + Rule.since(since_time) + else + # Full sync - only return enabled rules + Rule.active.sync_order + end + + render json: { + version: Rule.latest_version, + sampling: HubLoad.current_sampling, + rules: rules.map(&:to_agent_format) + } + rescue ArgumentError => e + render json: { error: "Invalid timestamp format: #{e.message}" }, status: :bad_request + end + + private + + def authenticate_project! + public_key = params[:public_key] || params[:project_id] + + @project = Project.find_by(public_key: public_key) + + unless @project + render json: { error: "Invalid project key" }, status: :unauthorized + return + end + end + + def check_project_enabled + unless @project.enabled? + render json: { error: "Project is disabled" }, status: :forbidden + end + end + + def parse_timestamp(timestamp_str) + Time.parse(timestamp_str) + rescue ArgumentError => e + raise ArgumentError, "Invalid timestamp format. Expected ISO8601 format (e.g., 2024-11-03T12:00:00.000Z)" + end + end +end diff --git a/app/jobs/expired_rules_cleanup_job.rb b/app/jobs/expired_rules_cleanup_job.rb new file mode 100644 index 0000000..779b0ba --- /dev/null +++ b/app/jobs/expired_rules_cleanup_job.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# ExpiredRulesCleanupJob - Disables rules that have expired +# +# This job runs periodically (hourly) to find rules with expires_at in the past +# and disables them. Agents will pick up these disabled rules in their next sync +# and remove them from their local evaluation tables. +# +# Schedule: Every hour (configured in initializer or cron) +class ExpiredRulesCleanupJob < ApplicationJob + queue_as :default + + def perform + expired_rules = Rule.enabled.expired + + Rails.logger.info "ExpiredRulesCleanupJob: Found #{expired_rules.count} expired rules" + + return if expired_rules.empty? + + # Disable all expired rules in a single update + count = expired_rules.update_all( + enabled: false, + updated_at: Time.current + ) + + Rails.logger.info "ExpiredRulesCleanupJob: Disabled #{count} expired rules" + + # Optionally: Clean up old disabled rules after a retention period + cleanup_old_disabled_rules if should_cleanup_old_rules? + + count + end + + private + + def should_cleanup_old_rules? + # Only cleanup on first hour of the day to avoid too frequent deletion + Time.current.hour == 1 + end + + def cleanup_old_disabled_rules + # Delete disabled rules older than 30 days (keep for audit trail) + old_rules = Rule.disabled.where("updated_at < ?", 30.days.ago) + + if old_rules.any? + count = old_rules.delete_all + Rails.logger.info "ExpiredRulesCleanupJob: Deleted #{count} old disabled rules (>30 days)" + end + end +end diff --git a/app/jobs/path_scanner_detector_job.rb b/app/jobs/path_scanner_detector_job.rb new file mode 100644 index 0000000..9b13251 --- /dev/null +++ b/app/jobs/path_scanner_detector_job.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +# PathScannerDetectorJob - Detects IPs hitting scanner paths and auto-bans them +# +# This job analyzes recent events to find IPs hitting common scanner/bot paths +# like /.env, /.git, /wp-admin, etc. When detected, it creates temporary IP +# block rules that expire after 24 hours. +# +# Schedule: Every 5 minutes (configured in initializer or cron) +class PathScannerDetectorJob < ApplicationJob + queue_as :default + + # Common paths that scanners/bots hit + SCANNER_PATHS = %w[ + /.env + /.git + /wp-admin + /wp-login.php + /phpMyAdmin + /phpmyadmin + /.aws + /.ssh + /admin + /administrator + /.config + /backup + /db_backup + /.DS_Store + /web.config + ].freeze + + # Minimum hits to be considered a scanner + MIN_SCANNER_HITS = 3 + + # Look back window + LOOKBACK_WINDOW = 5.minutes + + # Ban duration + BAN_DURATION = 24.hours + + def perform + scanner_ips = detect_scanner_ips + + Rails.logger.info "PathScannerDetectorJob: Found #{scanner_ips.count} scanner IPs" + + scanner_ips.each do |ip_data| + create_ban_rule(ip_data) + end + + scanner_ips.count + end + + private + + def detect_scanner_ips + # Find IPs that have hit scanner paths multiple times recently + Event + .where("timestamp > ?", LOOKBACK_WINDOW.ago) + .where("request_path IN (?)", SCANNER_PATHS) + .group(:ip_address) + .select("ip_address, COUNT(*) as hit_count, GROUP_CONCAT(DISTINCT request_path) as paths") + .having("COUNT(*) >= ?", MIN_SCANNER_HITS) + .map do |event| + { + ip: event.ip_address, + hit_count: event.hit_count, + paths: event.paths.to_s.split(",") + } + end + end + + def create_ban_rule(ip_data) + ip = ip_data[:ip] + + # Check if rule already exists for this IP + existing_rule = Rule.active.network_rules.find_by( + "conditions ->> 'cidr' = ?", "#{ip}/32" + ) + + if existing_rule + Rails.logger.info "PathScannerDetectorJob: Rule already exists for #{ip}, skipping" + return + end + + # Determine if IPv4 or IPv6 + addr = IPAddr.new(ip) + rule_type = addr.ipv4? ? "network_v4" : "network_v6" + + # Create the ban rule + rule = Rule.create!( + rule_type: rule_type, + action: "deny", + conditions: { cidr: "#{ip}/32" }, + priority: 32, + expires_at: BAN_DURATION.from_now, + source: "auto:scanner_detected", + enabled: true, + metadata: { + reason: "Scanner detected: hit #{ip_data[:paths].join(', ')}", + hit_count: ip_data[:hit_count], + paths: ip_data[:paths], + detected_at: Time.current.iso8601, + auto_generated: true + } + ) + + Rails.logger.info "PathScannerDetectorJob: Created ban rule #{rule.id} for #{ip} (expires: #{rule.expires_at})" + + rule + rescue IPAddr::InvalidAddressError => e + Rails.logger.error "PathScannerDetectorJob: Invalid IP address #{ip}: #{e.message}" + nil + rescue ActiveRecord::RecordInvalid => e + Rails.logger.error "PathScannerDetectorJob: Failed to create rule for #{ip}: #{e.message}" + nil + end +end diff --git a/app/jobs/process_waf_event_job.rb b/app/jobs/process_waf_event_job.rb index 2897147..adce6fb 100644 --- a/app/jobs/process_waf_event_job.rb +++ b/app/jobs/process_waf_event_job.rb @@ -28,6 +28,15 @@ class ProcessWafEventJob < ApplicationJob # Create the WAF event record event = Event.create_from_waf_payload!(event_id, single_event_data, project) + # Enrich with geo-location data if missing + if event.ip_address.present? && event.country_code.blank? + begin + event.enrich_geo_location! + rescue => e + Rails.logger.warn "Failed to enrich geo location for event #{event.id}: #{e.message}" + end + end + # Trigger analytics processing ProcessWafAnalyticsJob.perform_later(project_id: project_id, event_id: event.id) diff --git a/app/jobs/update_geo_ip_database_job.rb b/app/jobs/update_geo_ip_database_job.rb new file mode 100644 index 0000000..2fbc8ff --- /dev/null +++ b/app/jobs/update_geo_ip_database_job.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +class UpdateGeoIpDatabaseJob < ApplicationJob + queue_as :default + + # Schedule this job to run weekly to keep the GeoIP database updated + # Use: UpdateGeoIpDatabaseJob.set(wait: 1.week).perform_later + # Or set up in config/schedule.rb for recurring execution + + def perform(force_update: false) + return unless auto_update_enabled? + + Rails.logger.info "Starting GeoIP database update check" + + if should_update_database? || force_update + success = GeoIpService.update_database! + + if success + Rails.logger.info "GeoIP database successfully updated" + else + Rails.logger.error "Failed to update GeoIP database" + end + else + Rails.logger.info "GeoIP database is up to date, no update needed" + end + + # No cleanup needed with file-system approach + rescue => e + Rails.logger.error "Error in UpdateGeoIpDatabaseJob: #{e.message}" + Rails.logger.error e.backtrace.join("\n") + end + + private + + def auto_update_enabled? + Rails.application.config.maxmind.auto_update_enabled + end + + def should_update_database? + config = Rails.application.config.maxmind + database_path = default_database_path + + # Check if database file exists + return true unless File.exist?(database_path) + + # Check if database is outdated + max_age_days = config.max_age_days + file_mtime = File.mtime(database_path) + return true if file_mtime < max_age_days.days.ago + + # Check if database file is readable and valid + begin + # Try to open the database to verify it's valid + MaxMind::DB.new(database_path) + false + rescue => e + Rails.logger.warn "GeoIP database file appears to be corrupted: #{e.message}" + true + end + end + + def default_database_path + config = Rails.application.config.maxmind + File.join(config.storage_path, config.database_filename) + end +end \ No newline at end of file diff --git a/app/models/event.rb b/app/models/event.rb index 27f3b6d..37a64ea 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -88,40 +88,63 @@ class Event < ApplicationRecord after_validation :normalize_event_fields, if: :should_normalize? def self.create_from_waf_payload!(event_id, payload, project) + # Normalize headers in payload during import phase + normalized_payload = normalize_payload_headers(payload) + # Create the WAF request event create!( project: project, event_id: event_id, - timestamp: parse_timestamp(payload["timestamp"]), - payload: payload, + timestamp: parse_timestamp(normalized_payload["timestamp"]), + payload: normalized_payload, # WAF-specific fields - ip_address: payload.dig("request", "ip"), - user_agent: payload.dig("request", "headers", "User-Agent"), - request_method: payload.dig("request", "method")&.downcase, - request_path: payload.dig("request", "path"), - request_url: payload.dig("request", "url"), - request_protocol: payload.dig("request", "protocol"), - response_status: payload.dig("response", "status_code"), - response_time_ms: payload.dig("response", "duration_ms"), - waf_action: normalize_action(payload["waf_action"]), # Normalize incoming action values - rule_matched: payload["rule_matched"], - blocked_reason: payload["blocked_reason"], + ip_address: normalized_payload.dig("request", "ip"), + user_agent: normalized_payload.dig("request", "headers", "user-agent") || normalized_payload.dig("request", "headers", "User-Agent"), + # request_method will be set by extract_fields_from_payload + normalize_event_fields + request_path: normalized_payload.dig("request", "path"), + request_url: normalized_payload.dig("request", "url"), + # request_protocol will be set by extract_fields_from_payload + normalize_event_fields + response_status: normalized_payload.dig("response", "status_code"), + response_time_ms: normalized_payload.dig("response", "duration_ms"), + waf_action: normalize_action(normalized_payload["waf_action"]), # Normalize incoming action values + rule_matched: normalized_payload["rule_matched"], + blocked_reason: normalized_payload["blocked_reason"], # Server/Environment info - server_name: payload["server_name"], - environment: payload["environment"], + server_name: normalized_payload["server_name"], + environment: normalized_payload["environment"], # Geographic data - country_code: payload.dig("geo", "country_code"), - city: payload.dig("geo", "city"), + country_code: normalized_payload.dig("geo", "country_code"), + city: normalized_payload.dig("geo", "city"), # WAF agent info - agent_version: payload.dig("agent", "version"), - agent_name: payload.dig("agent", "name") + agent_version: normalized_payload.dig("agent", "version"), + agent_name: normalized_payload.dig("agent", "name") ) end + # Normalize headers in payload to lower case during import phase + def self.normalize_payload_headers(payload) + return payload unless payload.is_a?(Hash) + + # Create a deep copy to avoid modifying the original + normalized = Marshal.load(Marshal.dump(payload)) + + # Normalize request headers + if normalized.dig("request", "headers")&.is_a?(Hash) + normalized["request"]["headers"] = normalized["request"]["headers"].transform_keys(&:downcase) + end + + # Normalize response headers if they exist + if normalized.dig("response", "headers")&.is_a?(Hash) + normalized["response"]["headers"] = normalized["response"]["headers"].transform_keys(&:downcase) + end + + normalized + end + def self.normalize_action(action) return "allow" if action.nil? || action.blank? @@ -195,7 +218,8 @@ class Event < ApplicationRecord end def headers - payload&.dig("request", "headers") || {} + raw_headers = payload&.dig("request", "headers") || {} + normalize_headers(raw_headers) end def query_params @@ -237,6 +261,69 @@ class Event < ApplicationRecord URI.parse(request_url).hostname rescue nil end + # Normalize headers to lower case keys during import phase + def normalize_headers(headers) + return {} unless headers.is_a?(Hash) + + headers.transform_keys(&:downcase) + end + + # GeoIP enrichment methods + def enrich_geo_location! + return if ip_address.blank? + return if country_code.present? # Already has geo data + + country = GeoIpService.lookup_country(ip_address) + update!(country_code: country) if country.present? + rescue => e + Rails.logger.error "Failed to enrich geo location for event #{id}: #{e.message}" + end + + # Class method to enrich multiple events + def self.enrich_geo_location_batch(events = nil) + events ||= where(country_code: [nil, '']).where.not(ip_address: [nil, '']) + geo_service = GeoIpService.new + updated_count = 0 + + events.find_each do |event| + next if event.country_code.present? + + country = geo_service.lookup_country(event.ip_address) + if country.present? + event.update!(country_code: country) + updated_count += 1 + end + end + + updated_count + end + + # Lookup country code for this event's IP + def lookup_country + return country_code if country_code.present? + return nil if ip_address.blank? + + GeoIpService.lookup_country(ip_address) + rescue => e + Rails.logger.error "GeoIP lookup failed for #{ip_address}: #{e.message}" + nil + end + + # Check if event has valid geo location data + def has_geo_data? + country_code.present? || city.present? + end + + # Get full geo location details + def geo_location + { + country_code: country_code, + city: city, + ip_address: ip_address, + has_data: has_geo_data? + } + end + private def should_normalize? @@ -257,7 +344,12 @@ class Event < ApplicationRecord response_data = payload.dig("response") || {} self.ip_address = request_data["ip"] - self.user_agent = request_data.dig("headers", "User-Agent") + + # Extract user agent with header name standardization + headers = request_data["headers"] || {} + normalized_headers = normalize_headers(headers) + self.user_agent = normalized_headers["user-agent"] || normalized_headers["User-Agent"] + self.request_path = request_data["path"] self.request_url = request_data["url"] self.response_status = response_data["status_code"] @@ -265,10 +357,11 @@ class Event < ApplicationRecord self.rule_matched = payload["rule_matched"] self.blocked_reason = payload["blocked_reason"] - # Store original values for normalization (these will be normalized to IDs) - @raw_request_method = request_data["method"] - @raw_request_protocol = request_data["protocol"] - @raw_action = payload["waf_action"] + # Store original values for normalization only if they don't exist yet + # This prevents overwriting during multiple callback runs + @raw_request_method ||= request_data["method"] + @raw_request_protocol ||= request_data["protocol"] + @raw_action ||= payload["waf_action"] # Extract server/environment info self.server_name = payload["server_name"] diff --git a/app/models/ipv4_range.rb b/app/models/ipv4_range.rb new file mode 100644 index 0000000..2bb8b0a --- /dev/null +++ b/app/models/ipv4_range.rb @@ -0,0 +1,171 @@ +# frozen_string_literal: true + +# Ipv4Range - Stores IPv4 network ranges with IP intelligence metadata +# +# Optimized for fast range lookups using network_start/network_end integers. +# Stores metadata about IP ranges including ASN, company, geographic info, +# and flags for datacenter/proxy/VPN detection. +class Ipv4Range < ApplicationRecord + # Validations + validates :network_start, presence: true + validates :network_end, presence: true + validates :network_prefix, presence: true, + numericality: { greater_than_or_equal_to: 0, less_than_or_equal_to: 32 } + + # Callbacks + before_validation :calculate_range, if: -> { cidr.present? } + + # Scopes for common queries + scope :datacenter, -> { where(is_datacenter: true) } + scope :proxy, -> { where(is_proxy: true) } + scope :vpn, -> { where(is_vpn: true) } + scope :by_country, ->(country) { where(ip_api_country: country) } + scope :by_company, ->(company) { where(company: company) } + scope :by_asn, ->(asn) { where(asn: asn) } + + # Virtual attribute for setting IP via CIDR notation + attr_accessor :cidr + + # Find ranges that contain a specific IPv4 address + def self.contains_ip(ip_string) + ip_addr = IPAddr.new(ip_string) + raise ArgumentError, "Not an IPv4 address" unless ip_addr.ipv4? + + ip_int = ip_addr.to_i + + where("? BETWEEN network_start AND network_end", ip_int) + .order(network_prefix: :desc) # Most specific first + end + + # Check if this range contains a specific IP + def contains_ip?(ip_string) + ip_addr = IPAddr.new(ip_string) + return false unless ip_addr.ipv4? + + ip_int = ip_addr.to_i + ip_int >= network_start && ip_int <= network_end + end + + # Get CIDR notation for this range + def to_cidr + return nil unless network_start.present? + + ip_addr = IPAddr.new(network_start, Socket::AF_INET) + "#{ip_addr}/#{network_prefix}" + end + + # String representation + def to_s + to_cidr || "Ipv4Range##{id}" + end + + # Convenience methods for JSON fields + def abuser_scores_hash + abuser_scores ? JSON.parse(abuser_scores) : {} + rescue JSON::ParserError + {} + end + + def abuser_scores_hash=(hash) + self.abuser_scores = hash.to_json + end + + def additional_data_hash + additional_data ? JSON.parse(additional_data) : {} + rescue JSON::ParserError + {} + end + + def additional_data_hash=(hash) + self.additional_data = hash.to_json + end + + # GeoIP lookup methods + def geo_lookup_country! + return if ip_api_country.present? || geo2_country.present? + + # Use the first IP in the range for lookup + sample_ip = IPAddr.new(network_start, Socket::AF_INET).to_s + country = GeoIpService.lookup_country(sample_ip) + + if country.present? + # Update both country fields for redundancy + update!(ip_api_country: country, geo2_country: country) + country + end + rescue => e + Rails.logger.error "Failed to lookup geo location for IPv4 range #{id}: #{e.message}" + nil + end + + def geo_lookup_country + return ip_api_country if ip_api_country.present? + return geo2_country if geo2_country.present? + + # Use the first IP in the range for lookup + sample_ip = IPAddr.new(network_start, Socket::AF_INET).to_s + GeoIpService.lookup_country(sample_ip) + rescue => e + Rails.logger.error "Failed to lookup geo location for IPv4 range #{id}: #{e.message}" + nil + end + + # Check if this range has any country information + def has_country_info? + ip_api_country.present? || geo2_country.present? + end + + # Get the best available country code + def primary_country + ip_api_country || geo2_country + end + + # Class method to lookup country for any IP in the range + def self.lookup_country_by_ip(ip_string) + range = contains_ip(ip_string).first + return nil unless range + + range.geo_lookup_country + end + + # Class method to enrich ranges without country data + def self.enrich_missing_geo_data(limit: 1000) + ranges_without_geo = where(ip_api_country: [nil, ''], geo2_country: [nil, '']) + .limit(limit) + + updated_count = 0 + geo_service = GeoIpService.new + + ranges_without_geo.find_each do |range| + country = geo_service.lookup_country(IPAddr.new(range.network_start, Socket::AF_INET).to_s) + if country.present? + range.update!(ip_api_country: country, geo2_country: country) + updated_count += 1 + end + end + + updated_count + end + + private + + # Calculate network_start and network_end from CIDR notation + def calculate_range + return unless cidr.present? + + ip_addr = IPAddr.new(cidr) + raise ArgumentError, "Not an IPv4 CIDR" unless ip_addr.ipv4? + + # Get prefix from CIDR + self.network_prefix = cidr.split("/").last.to_i + + # Calculate network range + first_ip = ip_addr.to_range.first + last_ip = ip_addr.to_range.last + + self.network_start = first_ip.to_i + self.network_end = last_ip.to_i + rescue IPAddr::InvalidAddressError => e + errors.add(:cidr, "invalid IPv4 CIDR notation: #{e.message}") + end +end diff --git a/app/models/ipv6_range.rb b/app/models/ipv6_range.rb new file mode 100644 index 0000000..2f8226f --- /dev/null +++ b/app/models/ipv6_range.rb @@ -0,0 +1,171 @@ +# frozen_string_literal: true + +# Ipv6Range - Stores IPv6 network ranges with IP intelligence metadata +# +# Optimized for fast range lookups using network_start/network_end binary storage. +# Stores metadata about IP ranges including ASN, company, geographic info, +# and flags for datacenter/proxy/VPN detection. +class Ipv6Range < ApplicationRecord + # Validations + validates :network_start, presence: true + validates :network_end, presence: true + validates :network_prefix, presence: true, + numericality: { greater_than_or_equal_to: 0, less_than_or_equal_to: 128 } + + # Callbacks + before_validation :calculate_range, if: -> { cidr.present? } + + # Scopes for common queries + scope :datacenter, -> { where(is_datacenter: true) } + scope :proxy, -> { where(is_proxy: true) } + scope :vpn, -> { where(is_vpn: true) } + scope :by_country, ->(country) { where(ip_api_country: country) } + scope :by_company, ->(company) { where(company: company) } + scope :by_asn, ->(asn) { where(asn: asn) } + + # Virtual attribute for setting IP via CIDR notation + attr_accessor :cidr + + # Find ranges that contain a specific IPv6 address + def self.contains_ip(ip_string) + ip_addr = IPAddr.new(ip_string) + raise ArgumentError, "Not an IPv6 address" unless ip_addr.ipv6? + + ip_bytes = ip_addr.hton + + where("? BETWEEN network_start AND network_end", ip_bytes) + .order(network_prefix: :desc) # Most specific first + end + + # Check if this range contains a specific IP + def contains_ip?(ip_string) + ip_addr = IPAddr.new(ip_string) + return false unless ip_addr.ipv6? + + ip_bytes = ip_addr.hton + ip_bytes >= network_start && ip_bytes <= network_end + end + + # Get CIDR notation for this range + def to_cidr + return nil unless network_start.present? + + ip_addr = IPAddr.new_ntoh(network_start) + "#{ip_addr}/#{network_prefix}" + end + + # String representation + def to_s + to_cidr || "Ipv6Range##{id}" + end + + # Convenience methods for JSON fields + def abuser_scores_hash + abuser_scores ? JSON.parse(abuser_scores) : {} + rescue JSON::ParserError + {} + end + + def abuser_scores_hash=(hash) + self.abuser_scores = hash.to_json + end + + def additional_data_hash + additional_data ? JSON.parse(additional_data) : {} + rescue JSON::ParserError + {} + end + + def additional_data_hash=(hash) + self.additional_data = hash.to_json + end + + # GeoIP lookup methods + def geo_lookup_country! + return if ip_api_country.present? || geo2_country.present? + + # Use the first IP in the range for lookup + sample_ip = IPAddr.new_ntoh(network_start).to_s + country = GeoIpService.lookup_country(sample_ip) + + if country.present? + # Update both country fields for redundancy + update!(ip_api_country: country, geo2_country: country) + country + end + rescue => e + Rails.logger.error "Failed to lookup geo location for IPv6 range #{id}: #{e.message}" + nil + end + + def geo_lookup_country + return ip_api_country if ip_api_country.present? + return geo2_country if geo2_country.present? + + # Use the first IP in the range for lookup + sample_ip = IPAddr.new_ntoh(network_start).to_s + GeoIpService.lookup_country(sample_ip) + rescue => e + Rails.logger.error "Failed to lookup geo location for IPv6 range #{id}: #{e.message}" + nil + end + + # Check if this range has any country information + def has_country_info? + ip_api_country.present? || geo2_country.present? + end + + # Get the best available country code + def primary_country + ip_api_country || geo2_country + end + + # Class method to lookup country for any IP in the range + def self.lookup_country_by_ip(ip_string) + range = contains_ip(ip_string).first + return nil unless range + + range.geo_lookup_country + end + + # Class method to enrich ranges without country data + def self.enrich_missing_geo_data(limit: 1000) + ranges_without_geo = where(ip_api_country: [nil, ''], geo2_country: [nil, '']) + .limit(limit) + + updated_count = 0 + geo_service = GeoIpService.new + + ranges_without_geo.find_each do |range| + country = geo_service.lookup_country(IPAddr.new_ntoh(range.network_start).to_s) + if country.present? + range.update!(ip_api_country: country, geo2_country: country) + updated_count += 1 + end + end + + updated_count + end + + private + + # Calculate network_start and network_end from CIDR notation + def calculate_range + return unless cidr.present? + + ip_addr = IPAddr.new(cidr) + raise ArgumentError, "Not an IPv6 CIDR" unless ip_addr.ipv6? + + # Get prefix from CIDR + self.network_prefix = cidr.split("/").last.to_i + + # Calculate network range (binary format for IPv6) + first_ip = ip_addr.to_range.first + last_ip = ip_addr.to_range.last + + self.network_start = first_ip.hton + self.network_end = last_ip.hton + rescue IPAddr::InvalidAddressError => e + errors.add(:cidr, "invalid IPv6 CIDR notation: #{e.message}") + end +end diff --git a/app/models/network_range.rb b/app/models/network_range.rb deleted file mode 100644 index 9286f9e..0000000 --- a/app/models/network_range.rb +++ /dev/null @@ -1,63 +0,0 @@ -class NetworkRange < ApplicationRecord - validates :ip_address, presence: true - validates :network_prefix, presence: true, numericality: {greater_than_or_equal_to: 0, less_than_or_equal_to: 128} - validates :ip_version, presence: true, inclusion: {in: [4, 6]} - - # Convenience methods for JSON fields - def abuser_scores_hash - abuser_scores ? JSON.parse(abuser_scores) : {} - end - - def abuser_scores_hash=(hash) - self.abuser_scores = hash.to_json - end - - def additional_data_hash - additional_data ? JSON.parse(additional_data) : {} - end - - def additional_data_hash=(hash) - self.additional_data = hash.to_json - end - - # Scope methods for common queries - scope :ipv4, -> { where(ip_version: 4) } - scope :ipv6, -> { where(ip_version: 6) } - scope :datacenter, -> { where(is_datacenter: true) } - scope :proxy, -> { where(is_proxy: true) } - scope :vpn, -> { where(is_vpn: true) } - scope :by_country, ->(country) { where(ip_api_country: country) } - scope :by_company, ->(company) { where(company: company) } - scope :by_asn, ->(asn) { where(asn: asn) } - - # Find network ranges that contain a specific IP address - def self.contains_ip(ip_string) - ip_bytes = IPAddr.new(ip_string).hton - version = ip_string.include?(":") ? 6 : 4 - - where(ip_version: version).select do |range| - range.contains_ip_bytes?(ip_bytes) - end - end - - def contains_ip?(ip_string) - contains_ip_bytes?(IPAddr.new(ip_string).hton) - end - - def to_s - "#{ip_address_to_s}/#{network_prefix}" - end - - private - - def contains_ip_bytes?(ip_bytes) - # This is a simplified version - you'll need proper network math here - # For now, just check if the IP matches exactly - ip_address == ip_bytes - end - - def ip_address_to_s - # Convert binary IP back to string representation - IPAddr.ntop(ip_address) - end -end diff --git a/app/models/rule.rb b/app/models/rule.rb index d47010e..8e37a73 100644 --- a/app/models/rule.rb +++ b/app/models/rule.rb @@ -1,126 +1,189 @@ # frozen_string_literal: true class Rule < ApplicationRecord - belongs_to :rule_set + # Rule types for the new architecture + RULE_TYPES = %w[network_v4 network_v6 rate_limit path_pattern].freeze + ACTIONS = %w[allow deny rate_limit redirect log].freeze + SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default].freeze - validates :rule_type, presence: true, inclusion: { in: RuleSet::RULE_TYPES } - validates :target, presence: true - validates :action, presence: true, inclusion: { in: RuleSet::ACTIONS } - validates :priority, presence: true, numericality: { greater_than: 0 } + # Validations + validates :rule_type, presence: true, inclusion: { in: RULE_TYPES } + validates :action, presence: true, inclusion: { in: ACTIONS } + validates :conditions, presence: true + validates :enabled, inclusion: { in: [true, false] } + # Custom validations based on rule type + validate :validate_conditions_by_type + validate :validate_metadata_by_action + + # Scopes scope :enabled, -> { where(enabled: true) } - scope :by_priority, -> { order(priority: :desc, created_at: :desc) } - scope :expired, -> { where("expires_at < ?", Time.current) } - scope :not_expired, -> { where("expires_at IS NULL OR expires_at > ?", Time.current) } + scope :disabled, -> { where(enabled: false) } + scope :active, -> { enabled.where("expires_at IS NULL OR expires_at > ?", Time.current) } + scope :expired, -> { where("expires_at IS NOT NULL AND expires_at <= ?", Time.current) } + scope :by_type, ->(type) { where(rule_type: type) } + scope :network_rules, -> { where(rule_type: ["network_v4", "network_v6"]) } + scope :rate_limit_rules, -> { where(rule_type: "rate_limit") } + scope :path_pattern_rules, -> { where(rule_type: "path_pattern") } + scope :by_source, ->(source) { where(source: source) } + + # Sync queries (ordered by updated_at for incremental sync) + scope :since, ->(timestamp) { where("updated_at >= ?", timestamp - 0.5.seconds).order(:updated_at, :id) } + scope :sync_order, -> { order(:updated_at, :id) } + + # Callbacks + before_validation :set_defaults + before_save :calculate_priority_from_cidr # Check if rule is currently active def active? - enabled? && (expires_at.nil? || expires_at > Time.current) + enabled? && !expired? end - # Check if rule matches given request context - def matches?(context) - return false unless active? - - case rule_type - when 'ip' - match_ip_rule?(context) - when 'cidr' - match_cidr_rule?(context) - when 'path' - match_path_rule?(context) - when 'user_agent' - match_user_agent_rule?(context) - when 'parameter' - match_parameter_rule?(context) - when 'method' - match_method_rule?(context) - when 'country' - match_country_rule?(context) - else - false - end + def expired? + expires_at.present? && expires_at <= Time.current end - def to_waf_format + # Convert to format for agent consumption + def to_agent_format { id: id, - type: rule_type, - target: target, + rule_type: rule_type, action: action, conditions: conditions || {}, priority: priority, - expires_at: expires_at, - active: active? + expires_at: expires_at&.iso8601, + enabled: enabled, + source: source, + metadata: metadata || {}, + created_at: created_at.iso8601, + updated_at: updated_at.iso8601 } end + # Class method to get latest version (for sync cursor) + def self.latest_version + maximum(:updated_at)&.iso8601(6) || Time.current.iso8601(6) + end + + # Disable rule (soft delete) + def disable!(reason: nil) + update!( + enabled: false, + metadata: (metadata || {}).merge( + disabled_at: Time.current.iso8601, + disabled_reason: reason + ) + ) + end + + # Enable rule + def enable! + update!(enabled: true) + end + + # Check if this is a network rule + def network_rule? + rule_type.in?(%w[network_v4 network_v6]) + end + + # Get CIDR from conditions (for network rules) + def cidr + conditions&.dig("cidr") if network_rule? + end + + # Get prefix length from CIDR + def prefix_length + return nil unless cidr + cidr.split("/").last.to_i + end + private - def match_ip_rule?(context) - return false unless context[:ip_address] - - target == context[:ip_address] + def set_defaults + self.enabled = true if enabled.nil? + self.conditions ||= {} + self.metadata ||= {} + self.source ||= "manual" end - def match_cidr_rule?(context) - return false unless context[:ip_address] + def calculate_priority_from_cidr + # For network rules, priority is the prefix length (more specific = higher priority) + if network_rule? && cidr.present? + self.priority = prefix_length + end + end + def validate_conditions_by_type + case rule_type + when "network_v4", "network_v6" + validate_network_conditions + when "rate_limit" + validate_rate_limit_conditions + when "path_pattern" + validate_path_pattern_conditions + end + end + + def validate_network_conditions + cidr_value = conditions&.dig("cidr") + + if cidr_value.blank? + errors.add(:conditions, "must include 'cidr' for network rules") + return + end + + # Validate CIDR format begin - range = IPAddr.new(target) - range.include?(context[:ip_address]) - rescue IPAddr::InvalidAddressError - false + addr = IPAddr.new(cidr_value) + + # Check IPv4 vs IPv6 matches rule_type + if rule_type == "network_v4" && !addr.ipv4? + errors.add(:conditions, "cidr must be IPv4 for network_v4 rules") + elsif rule_type == "network_v6" && !addr.ipv6? + errors.add(:conditions, "cidr must be IPv6 for network_v6 rules") + end + rescue IPAddr::InvalidAddressError => e + errors.add(:conditions, "invalid CIDR format: #{e.message}") end end - def match_path_rule?(context) - return false unless context[:request_path] + def validate_rate_limit_conditions + scope = conditions&.dig("scope") + cidr_value = conditions&.dig("cidr") - # Support exact match and regex patterns - if conditions&.dig('regex') == true - Regexp.new(target).match?(context[:request_path]) - else - context[:request_path].start_with?(target) + if scope.blank? + errors.add(:conditions, "must include 'scope' for rate_limit rules") + end + + if cidr_value.blank? + errors.add(:conditions, "must include 'cidr' for rate_limit rules") + end + + # Validate metadata has rate limit config + unless metadata&.dig("limit").present? && metadata&.dig("window").present? + errors.add(:metadata, "must include 'limit' and 'window' for rate_limit rules") end end - def match_user_agent_rule?(context) - return false unless context[:user_agent] + def validate_path_pattern_conditions + patterns = conditions&.dig("patterns") - # Support exact match and regex patterns - if conditions&.dig('regex') == true - Regexp.new(target, Regexp::IGNORECASE).match?(context[:user_agent]) - else - context[:user_agent].downcase.include?(target.downcase) + if patterns.blank? || !patterns.is_a?(Array) + errors.add(:conditions, "must include 'patterns' array for path_pattern rules") end end - def match_parameter_rule?(context) - return false unless context[:query_params] - - param_name = conditions&.dig('parameter_name') || target - param_value = context[:query_params][param_name] - - return false unless param_value - - # Check if parameter value matches pattern - if conditions&.dig('regex') == true - Regexp.new(target, Regexp::IGNORECASE).match?(param_value.to_s) - else - param_value.to_s.downcase.include?(target.downcase) + def validate_metadata_by_action + case action + when "redirect" + unless metadata&.dig("redirect_url").present? + errors.add(:metadata, "must include 'redirect_url' for redirect action") + end + when "rate_limit" + unless metadata&.dig("limit").present? && metadata&.dig("window").present? + errors.add(:metadata, "must include 'limit' and 'window' for rate_limit action") + end end end - - def match_method_rule?(context) - return false unless context[:request_method] - - target.upcase == context[:request_method].upcase - end - - def match_country_rule?(context) - return false unless context[:country_code] - - target.upcase == context[:country_code].upcase - end end diff --git a/app/services/event_normalizer.rb b/app/services/event_normalizer.rb index df89068..d2190e0 100644 --- a/app/services/event_normalizer.rb +++ b/app/services/event_normalizer.rb @@ -47,7 +47,7 @@ class EventNormalizer else :allow end - @event.action = action_enum + @event.waf_action = action_enum end def normalize_method diff --git a/app/services/geo_ip_service.rb b/app/services/geo_ip_service.rb new file mode 100644 index 0000000..e12dc0f --- /dev/null +++ b/app/services/geo_ip_service.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +require 'maxmind/db' +require 'httparty' +require 'digest' +require 'tmpdir' +require 'fileutils' + +class GeoIpService + include HTTParty + + class DatabaseNotAvailable < StandardError; end + class InvalidIpAddress < StandardError; end + + attr_reader :database_reader, :database_path + + def initialize(database_path: nil) + @database_path = database_path || default_database_path + @database_reader = nil + load_database if File.exist?(@database_path) + end + + # Main lookup method - returns country code for IP address + def lookup_country(ip_address) + return fallback_country unless database_available? + return fallback_country unless valid_ip?(ip_address) + + result = database_reader.get(ip_address) + return fallback_country if result.nil? || result.empty? + + # Extract country code from MaxMind result + result['country']&.[]('iso_code') || fallback_country + rescue => e + Rails.logger.error "GeoIP lookup failed for #{ip_address}: #{e.message}" + fallback_country + end + + # Check if database is available and loaded + def database_available? + return false unless File.exist?(@database_path) + + load_database unless database_reader + database_reader.present? + end + + # Get database information + def database_info + return nil unless File.exist?(@database_path) + + file_stat = File.stat(@database_path) + metadata = database_reader&.metadata + + if metadata + { + type: 'GeoLite2-Country', + version: "#{metadata.binary_format_major_version}.#{metadata.binary_format_minor_version}", + size: file_stat.size, + modified_at: file_stat.mtime, + age_days: ((Time.current - file_stat.mtime) / 1.day).round, + file_path: @database_path + } + else + { + type: 'GeoLite2-Country', + version: 'Unknown', + size: file_stat.size, + modified_at: file_stat.mtime, + age_days: ((Time.current - file_stat.mtime) / 1.day).round, + file_path: @database_path + } + end + end + + # Class method for convenience lookup + def self.lookup_country(ip_address) + new.lookup_country(ip_address) + end + + # Update database from remote source + def self.update_database! + new.update_from_remote! + end + + # Download and install database from remote URL + def update_from_remote! + config = Rails.application.config.maxmind + database_url = config.database_url + storage_path = config.storage_path + database_filename = config.database_filename + temp_file = nil + + Rails.logger.info "Starting GeoIP database download from #{database_url}" + + begin + # Ensure storage directory exists + FileUtils.mkdir_p(storage_path) unless Dir.exist?(storage_path) + + # Download to temporary file + Dir.mktmpdir do |temp_dir| + temp_file = File.join(temp_dir, database_filename) + + response = HTTParty.get(database_url, timeout: 60) + raise "Failed to download database: #{response.code}" unless response.success? + + File.binwrite(temp_file, response.body) + + # Validate downloaded file + validate_downloaded_file(temp_file) + + # Move to final location + final_path = File.join(storage_path, database_filename) + File.rename(temp_file, final_path) + + # Reload the database with new file + @database_reader = nil + load_database + + Rails.logger.info "GeoIP database successfully updated: #{final_path}" + return true + end + rescue => e + Rails.logger.error "Failed to update GeoIP database: #{e.message}" + File.delete(temp_file) if temp_file && File.exist?(temp_file) + false + end + end + + private + + def load_database + return unless File.exist?(@database_path) + + @database_reader = MaxMind::DB.new(@database_path) + rescue => e + Rails.logger.error "Failed to load GeoIP database: #{e.message}" + @database_reader = nil + end + + def default_database_path + config = Rails.application.config.maxmind + File.join(config.storage_path, config.database_filename) + end + + def valid_ip?(ip_address) + IPAddr.new(ip_address) + true + rescue IPAddr::InvalidAddressError + false + end + + def fallback_country + config = Rails.application.config.maxmind + return nil unless config.enable_fallback + config.fallback_country + end + + def cache_size + return 0 unless Rails.application.config.maxmind.cache_enabled + Rails.application.config.maxmind.cache_size + end + + def validate_downloaded_file(file_path) + # Basic file existence and size check + raise "Downloaded file is empty" unless File.exist?(file_path) + raise "Downloaded file is too small" if File.size(file_path) < 1_000_000 # ~1MB minimum + + # Try to open with MaxMind reader + begin + MaxMind::DB.new(file_path) + rescue => e + raise "Invalid MaxMind database format: #{e.message}" + end + end +end diff --git a/app/services/hub_load.rb b/app/services/hub_load.rb new file mode 100644 index 0000000..8dec59d --- /dev/null +++ b/app/services/hub_load.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +# HubLoad - Calculates dynamic event sampling rate based on system load +# +# This service monitors SolidQueue depth and adjusts sampling rates to prevent +# the Hub from being overwhelmed while ensuring critical events are always captured. +class HubLoad + # Queue depth thresholds + THRESHOLDS = { + normal: 0..1_000, # 100% sampling + moderate: 1_001..5_000, # 50% sampling + high: 5_001..10_000, # 20% sampling + critical: 10_001..Float::INFINITY # 5% sampling + }.freeze + + SAMPLING_RATES = { + normal: { allowed: 1.0, blocked: 1.0, rate_limited: 1.0 }, + moderate: { allowed: 0.5, blocked: 1.0, rate_limited: 1.0 }, + high: { allowed: 0.2, blocked: 1.0, rate_limited: 1.0 }, + critical: { allowed: 0.05, blocked: 1.0, rate_limited: 1.0 } + }.freeze + + # Get current sampling configuration based on load + def self.current_sampling + load_level = calculate_load_level + rates = SAMPLING_RATES[load_level] + + { + allowed_requests: rates[:allowed], + blocked_requests: rates[:blocked], + rate_limited_requests: rates[:rate_limited], + effective_until: next_sync_time, + load_level: load_level, + queue_depth: queue_depth + } + end + + # Calculate when sampling should be rechecked (next agent sync) + def self.next_sync_time + 10.seconds.from_now.iso8601(3) + end + + # Get current queue depth + def self.queue_depth + # SolidQueue stores jobs in the jobs table + # Count pending/running jobs only + SolidQueue::Job.where(finished_at: nil).count + rescue StandardError => e + Rails.logger.error "Failed to get queue depth: #{e.message}" + 0 + end + + # Determine load level based on queue depth + def self.calculate_load_level + depth = queue_depth + + THRESHOLDS.each do |level, range| + return level if range.cover?(depth) + end + + :critical # Fallback + end + + # Check if hub is under heavy load + def self.overloaded? + calculate_load_level.in?([:high, :critical]) + end + + # Get load statistics for monitoring + def self.stats + { + queue_depth: queue_depth, + load_level: calculate_load_level, + sampling_rates: SAMPLING_RATES[calculate_load_level], + overloaded: overloaded? + } + end +end diff --git a/app/views/projects/index.html.erb b/app/views/projects/index.html.erb index bd53ccd..676e01e 100644 --- a/app/views/projects/index.html.erb +++ b/app/views/projects/index.html.erb @@ -32,7 +32,7 @@
diff --git a/config/initializers/maxmind.rb b/config/initializers/maxmind.rb new file mode 100644 index 0000000..14dcbce --- /dev/null +++ b/config/initializers/maxmind.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require 'fileutils' + +# MaxMind GeoIP Configuration +Rails.application.configure do + config.maxmind = ActiveSupport::OrderedOptions.new + + # Database configuration + config.maxmind.database_url = ENV.fetch('MAXMIND_DATABASE_URL', 'https://github.com/P3TERX/GeoLite.mmdb/raw/download/GeoLite2-Country.mmdb') + config.maxmind.database_type = 'GeoLite2-Country' + + # Local storage paths + config.maxmind.storage_path = Rails.root.join('db', 'geoip') + config.maxmind.database_filename = 'GeoLite2-Country.mmdb' + + # Update configuration + config.maxmind.auto_update_enabled = ENV.fetch('MAXMIND_AUTO_UPDATE', 'true').downcase == 'true' + config.maxmind.update_interval_days = ENV.fetch('MAXMIND_UPDATE_INTERVAL_DAYS', 7).to_i + config.maxmind.max_age_days = ENV.fetch('MAXMIND_MAX_AGE_DAYS', 30).to_i + + # Note: MaxMind DB has its own internal caching, no additional caching needed + + # Fallback settings + config.maxmind.fallback_country = ENV.fetch('MAXMIND_FALLBACK_COUNTRY', nil) + config.maxmind.enable_fallback = ENV.fetch('MAXMIND_ENABLE_FALLBACK', 'false').downcase == 'true' +end + +# Ensure storage directory exists +maxmind_storage_path = Rails.application.config.maxmind.storage_path +FileUtils.mkdir_p(maxmind_storage_path) unless Dir.exist?(maxmind_storage_path) \ No newline at end of file diff --git a/config/routes.rb b/config/routes.rb index f5d101b..e35819d 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -5,9 +5,14 @@ Rails.application.routes.draw do # Can be used by load balancers and uptime monitors to verify that the app is live. get "up" => "rails/health#show", as: :rails_health_check - # WAF Event Ingestion API + # WAF API namespace :api, defaults: { format: :json } do + # Event ingestion post ":project_id/events", to: "events#create" + + # Rule synchronization + get ":public_key/rules/version", to: "rules#version" + get ":public_key/rules", to: "rules#index" end # Root path - projects dashboard diff --git a/db/migrate/20251103080823_enhance_rules_table_for_sync.rb b/db/migrate/20251103080823_enhance_rules_table_for_sync.rb new file mode 100644 index 0000000..ebb6170 --- /dev/null +++ b/db/migrate/20251103080823_enhance_rules_table_for_sync.rb @@ -0,0 +1,56 @@ +class EnhanceRulesTableForSync < ActiveRecord::Migration[8.1] + def change + # Remove rule_sets relationship (we're skipping rule sets for Phase 1) + if foreign_key_exists?(:rules, :rule_sets) + remove_foreign_key :rules, :rule_sets + end + + if column_exists?(:rules, :rule_set_id) + remove_column :rules, :rule_set_id + end + + change_table :rules do |t| + # Add source field to track rule origin + unless column_exists?(:rules, :source) + t.string :source, limit: 100 + end + + # Ensure core fields exist with proper types + unless column_exists?(:rules, :rule_type) + t.string :rule_type, null: false + end + + unless column_exists?(:rules, :action) + t.string :action, null: false + end + + unless column_exists?(:rules, :conditions) + t.json :conditions, null: false, default: {} + end + + unless column_exists?(:rules, :metadata) + t.json :metadata, default: {} + end + + unless column_exists?(:rules, :priority) + t.integer :priority + end + + unless column_exists?(:rules, :expires_at) + t.datetime :expires_at + end + + unless column_exists?(:rules, :enabled) + t.boolean :enabled, default: true, null: false + end + end + + # Add indexes for efficient sync queries + add_index :rules, [:updated_at, :id], if_not_exists: true, name: "idx_rules_sync" + add_index :rules, :enabled, if_not_exists: true + add_index :rules, :expires_at, if_not_exists: true + add_index :rules, :source, if_not_exists: true + add_index :rules, :rule_type, if_not_exists: true + add_index :rules, [:rule_type, :enabled], if_not_exists: true, name: "idx_rules_type_enabled" + end +end diff --git a/db/migrate/20251103093205_split_network_ranges_into_ipv4_and_ipv6.rb b/db/migrate/20251103093205_split_network_ranges_into_ipv4_and_ipv6.rb new file mode 100644 index 0000000..4c14770 --- /dev/null +++ b/db/migrate/20251103093205_split_network_ranges_into_ipv4_and_ipv6.rb @@ -0,0 +1,70 @@ +class SplitNetworkRangesIntoIpv4AndIpv6 < ActiveRecord::Migration[8.1] + def change + # Drop the old network_ranges table (no data to preserve) + drop_table :network_ranges, if_exists: true + + # Create optimized IPv4 ranges table + create_table :ipv4_ranges do |t| + # Range fields for fast lookups + t.integer :network_start, limit: 8, null: false + t.integer :network_end, limit: 8, null: false + t.integer :network_prefix, null: false + + # IP intelligence metadata + t.string :company + t.integer :asn + t.string :asn_org + t.boolean :is_datacenter, default: false + t.boolean :is_proxy, default: false + t.boolean :is_vpn, default: false + t.string :ip_api_country + t.string :geo2_country + t.text :abuser_scores + t.text :additional_data + t.timestamp :last_api_fetch + + t.timestamps + end + + # Optimized indexes for IPv4 + add_index :ipv4_ranges, [:network_start, :network_end, :network_prefix], + name: "idx_ipv4_range_lookup" + add_index :ipv4_ranges, :asn, name: "idx_ipv4_asn" + add_index :ipv4_ranges, :company, name: "idx_ipv4_company" + add_index :ipv4_ranges, :ip_api_country, name: "idx_ipv4_country" + add_index :ipv4_ranges, [:is_datacenter, :is_proxy, :is_vpn], + name: "idx_ipv4_flags" + + # Create optimized IPv6 ranges table + create_table :ipv6_ranges do |t| + # Range fields for fast lookups (binary for 128-bit addresses) + t.binary :network_start, limit: 16, null: false + t.binary :network_end, limit: 16, null: false + t.integer :network_prefix, null: false + + # IP intelligence metadata (same as IPv4) + t.string :company + t.integer :asn + t.string :asn_org + t.boolean :is_datacenter, default: false + t.boolean :is_proxy, default: false + t.boolean :is_vpn, default: false + t.string :ip_api_country + t.string :geo2_country + t.text :abuser_scores + t.text :additional_data + t.timestamp :last_api_fetch + + t.timestamps + end + + # Optimized indexes for IPv6 + add_index :ipv6_ranges, [:network_start, :network_end, :network_prefix], + name: "idx_ipv6_range_lookup" + add_index :ipv6_ranges, :asn, name: "idx_ipv6_asn" + add_index :ipv6_ranges, :company, name: "idx_ipv6_company" + add_index :ipv6_ranges, :ip_api_country, name: "idx_ipv6_country" + add_index :ipv6_ranges, [:is_datacenter, :is_proxy, :is_vpn], + name: "idx_ipv6_flags" + end +end diff --git a/db/migrate/20251103103521_create_geo_ip_databases.rb b/db/migrate/20251103103521_create_geo_ip_databases.rb new file mode 100644 index 0000000..724f72e --- /dev/null +++ b/db/migrate/20251103103521_create_geo_ip_databases.rb @@ -0,0 +1,16 @@ +class CreateGeoIpDatabases < ActiveRecord::Migration[8.1] + def change + create_table :geo_ip_databases do |t| + t.string :database_type + t.string :version + t.string :file_path + t.integer :file_size + t.string :checksum_md5 + t.datetime :downloaded_at + t.datetime :last_checked_at + t.boolean :is_active + + t.timestamps + end + end +end diff --git a/db/migrate/20251103105609_drop_geo_ip_databases_table.rb b/db/migrate/20251103105609_drop_geo_ip_databases_table.rb new file mode 100644 index 0000000..e1eff10 --- /dev/null +++ b/db/migrate/20251103105609_drop_geo_ip_databases_table.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class DropGeoIpDatabasesTable < ActiveRecord::Migration[8.1] + def up + drop_table :geo_ip_databases + end + + def down + create_table :geo_ip_databases do |t| + t.string :database_type, null: false + t.string :version, null: false + t.string :file_path, null: false + t.integer :file_size, null: false + t.string :checksum_md5, null: false + t.datetime :downloaded_at, null: false + t.datetime :last_checked_at + t.boolean :is_active, default: true + t.timestamps + end + + add_index :geo_ip_databases, :is_active + add_index :geo_ip_databases, :database_type + add_index :geo_ip_databases, :file_path, unique: true + end +end diff --git a/db/migrate/20251103130430_change_request_method_to_integer_in_events.rb b/db/migrate/20251103130430_change_request_method_to_integer_in_events.rb new file mode 100644 index 0000000..2698078 --- /dev/null +++ b/db/migrate/20251103130430_change_request_method_to_integer_in_events.rb @@ -0,0 +1,74 @@ +class ChangeRequestMethodToIntegerInEvents < ActiveRecord::Migration[8.1] + def change + # Convert enum columns from string to integer for proper enum support + reversible do |dir| + dir.up do + # Map request_method string values to enum integers + execute <<-SQL + UPDATE events + SET request_method = CASE + WHEN LOWER(request_method) = 'get' THEN '0' + WHEN LOWER(request_method) = 'post' THEN '1' + WHEN LOWER(request_method) = 'put' THEN '2' + WHEN LOWER(request_method) = 'patch' THEN '3' + WHEN LOWER(request_method) = 'delete' THEN '4' + WHEN LOWER(request_method) = 'head' THEN '5' + WHEN LOWER(request_method) = 'options' THEN '6' + ELSE '0' -- Default to GET for unknown values + END + WHERE request_method IS NOT NULL; + SQL + + # Map waf_action string values to enum integers + execute <<-SQL + UPDATE events + SET waf_action = CASE + WHEN LOWER(waf_action) = 'allow' THEN '0' + WHEN LOWER(waf_action) IN ('deny', 'block') THEN '1' + WHEN LOWER(waf_action) = 'redirect' THEN '2' + WHEN LOWER(waf_action) = 'challenge' THEN '3' + ELSE '0' -- Default to allow for unknown values + END + WHERE waf_action IS NOT NULL; + SQL + + # Change column types to integer + change_column :events, :request_method, :integer + change_column :events, :waf_action, :integer + end + + dir.down do + # Convert back to string values + change_column :events, :request_method, :string + change_column :events, :waf_action, :string + + execute <<-SQL + UPDATE events + SET request_method = CASE request_method + WHEN 0 THEN 'get' + WHEN 1 THEN 'post' + WHEN 2 THEN 'put' + WHEN 3 THEN 'patch' + WHEN 4 THEN 'delete' + WHEN 5 THEN 'head' + WHEN 6 THEN 'options' + ELSE 'get' -- Default to GET for unknown values + END + WHERE request_method IS NOT NULL; + SQL + + execute <<-SQL + UPDATE events + SET waf_action = CASE waf_action + WHEN 0 THEN 'allow' + WHEN 1 THEN 'deny' + WHEN 2 THEN 'redirect' + WHEN 3 THEN 'challenge' + ELSE 'allow' -- Default to allow for unknown values + END + WHERE waf_action IS NOT NULL; + SQL + end + end + end +end diff --git a/db/schema.rb b/db/schema.rb index d4a847c..4a0a627 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do +ActiveRecord::Schema[8.1].define(version: 2025_11_03_130430) do create_table "events", force: :cascade do |t| t.string "agent_name" t.string "agent_version" @@ -24,7 +24,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.json "payload" t.integer "project_id", null: false t.integer "request_host_id" - t.string "request_method" + t.integer "request_method" t.string "request_path" t.string "request_protocol" t.string "request_segment_ids" @@ -36,7 +36,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.datetime "timestamp", null: false t.datetime "updated_at", null: false t.text "user_agent" - t.string "waf_action" + t.integer "waf_action" t.index ["event_id"], name: "index_events_on_event_id", unique: true t.index ["ip_address"], name: "index_events_on_ip_address" t.index ["project_id", "ip_address"], name: "index_events_on_project_id_and_ip_address" @@ -50,7 +50,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.index ["waf_action"], name: "index_events_on_waf_action" end - create_table "network_ranges", force: :cascade do |t| + create_table "ipv4_ranges", force: :cascade do |t| t.text "abuser_scores" t.text "additional_data" t.integer "asn" @@ -58,21 +58,44 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.string "company" t.datetime "created_at", null: false t.string "geo2_country" - t.binary "ip_address", null: false t.string "ip_api_country" - t.integer "ip_version", null: false t.boolean "is_datacenter", default: false t.boolean "is_proxy", default: false t.boolean "is_vpn", default: false t.datetime "last_api_fetch" + t.integer "network_end", limit: 8, null: false t.integer "network_prefix", null: false + t.integer "network_start", limit: 8, null: false t.datetime "updated_at", null: false - t.index ["asn"], name: "idx_network_ranges_asn" - t.index ["company"], name: "idx_network_ranges_company" - t.index ["ip_address", "network_prefix"], name: "idx_network_ranges_ip_range" - t.index ["ip_api_country"], name: "idx_network_ranges_country" - t.index ["ip_version"], name: "idx_network_ranges_version" - t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_network_ranges_flags" + t.index ["asn"], name: "idx_ipv4_asn" + t.index ["company"], name: "idx_ipv4_company" + t.index ["ip_api_country"], name: "idx_ipv4_country" + t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_ipv4_flags" + t.index ["network_start", "network_end", "network_prefix"], name: "idx_ipv4_range_lookup" + end + + create_table "ipv6_ranges", force: :cascade do |t| + t.text "abuser_scores" + t.text "additional_data" + t.integer "asn" + t.string "asn_org" + t.string "company" + t.datetime "created_at", null: false + t.string "geo2_country" + t.string "ip_api_country" + t.boolean "is_datacenter", default: false + t.boolean "is_proxy", default: false + t.boolean "is_vpn", default: false + t.datetime "last_api_fetch" + t.binary "network_end", limit: 16, null: false + t.integer "network_prefix", null: false + t.binary "network_start", limit: 16, null: false + t.datetime "updated_at", null: false + t.index ["asn"], name: "idx_ipv6_asn" + t.index ["company"], name: "idx_ipv6_company" + t.index ["ip_api_country"], name: "idx_ipv6_country" + t.index ["is_datacenter", "is_proxy", "is_vpn"], name: "idx_ipv6_flags" + t.index ["network_start", "network_end", "network_prefix"], name: "idx_ipv6_range_lookup" end create_table "path_segments", force: :cascade do |t| @@ -101,6 +124,13 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.index ["slug"], name: "index_projects_on_slug", unique: true end + create_table "request_actions", force: :cascade do |t| + t.string "action", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["action"], name: "index_request_actions_on_action", unique: true + end + create_table "request_hosts", force: :cascade do |t| t.datetime "created_at", null: false t.datetime "first_seen_at", null: false @@ -148,14 +178,18 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_03_035249) do t.datetime "expires_at" t.json "metadata" t.integer "priority" - t.integer "rule_set_id", null: false t.string "rule_type" + t.string "source", limit: 100 t.string "target" t.datetime "updated_at", null: false - t.index ["rule_set_id"], name: "index_rules_on_rule_set_id" + t.index ["enabled"], name: "index_rules_on_enabled" + t.index ["expires_at"], name: "index_rules_on_expires_at" + t.index ["rule_type", "enabled"], name: "idx_rules_type_enabled" + t.index ["rule_type"], name: "index_rules_on_rule_type" + t.index ["source"], name: "index_rules_on_source" + t.index ["updated_at", "id"], name: "idx_rules_sync" end add_foreign_key "events", "projects" add_foreign_key "events", "request_hosts" - add_foreign_key "rules", "rule_sets" end diff --git a/docs/maxmind.md b/docs/maxmind.md new file mode 100644 index 0000000..1496476 --- /dev/null +++ b/docs/maxmind.md @@ -0,0 +1,358 @@ +# MaxMind GeoIP Integration + +This document describes the MaxMind GeoIP integration implemented in the Baffle Hub WAF analytics system. + +## Overview + +The Baffle Hub application uses MaxMind's free GeoLite2-Country database to provide geographic location information for IP addresses. The system automatically enriches WAF events with country codes and provides manual lookup capabilities for both IPv4 and IPv6 addresses. + +## Features + +- **On-demand lookup** - Country code lookup by IP address +- **Automatic enrichment** - Events are enriched with geo-location data during processing +- **Manual lookup capability** - Rake tasks and model methods for manual lookups +- **GeoLite2-Country database** - Uses MaxMind's free country-level database +- **Automatic updates** - Weekly background job updates the database +- **IPv4/IPv6 support** - Full protocol support for both IP versions +- **Performance optimized** - Database caching and efficient lookups +- **Graceful degradation** - Fallback handling when database is unavailable + +## Architecture + +### Core Components + +#### 1. GeoIpService +- Central service for all IP geolocation operations +- Handles database loading from file system +- Provides batch lookup capabilities +- Manages database updates from MaxMind CDN +- Uses MaxMind's built-in metadata for version information + +#### 2. UpdateGeoIpDatabaseJob +- Background job for automatic database updates +- Runs weekly to keep the database current +- Simple file-based validation and updates + +#### 3. Enhanced Models +- **Event Model** - Automatic geo-location enrichment for WAF events +- **IPv4Range/IPv6Range Models** - Manual lookup methods for IP ranges + +#### 4. File-System Management +- Database stored as single file: `db/geoip/GeoLite2-Country.mmdb` +- Version information queried directly from MaxMind database metadata +- No database tables needed - simplified approach + +## Installation & Setup + +### Dependencies +The integration uses the following gems: +- `maxmind-db` - Official MaxMind database reader (with built-in caching) +- `httparty` - HTTP client for database downloads + +### Database Storage +- Location: `db/geoip/GeoLite2-Country.mmdb` +- Automatic creation of storage directory +- File validation and integrity checking +- Version information queried directly from database metadata +- No additional caching needed - MaxMind DB has its own internal caching + +### Initial Setup +```bash +# Install dependencies +bundle install + +# Download the GeoIP database +rails geoip:update + +# Verify installation +rails geoip:status +``` + +## Configuration + +The system is configurable via environment variables or application configuration: + +| Variable | Default | Description | +|----------|---------|-------------| +| `MAXMIND_DATABASE_URL` | MaxMind CDN URL | Database download URL | +| `MAXMIND_AUTO_UPDATE` | `true` | Enable automatic weekly updates | +| `MAXMIND_UPDATE_INTERVAL_DAYS` | `7` | Days between update checks | +| `MAXMIND_MAX_AGE_DAYS` | `30` | Maximum database age before forced update | +| Note: MaxMind DB has built-in caching, no additional caching needed | +| `MAXMIND_FALLBACK_COUNTRY` | `nil` | Fallback country when lookup fails | +| `MAXMIND_ENABLE_FALLBACK` | `false` | Enable fallback country usage | + +### Example Configuration +```bash +# config/application.rb or .env file +MAXMIND_AUTO_UPDATE=true +MAXMIND_UPDATE_INTERVAL_DAYS=7 +MAXMIND_MAX_AGE_DAYS=30 +MAXMIND_FALLBACK_COUNTRY=US +MAXMIND_ENABLE_FALLBACK=true +# Note: No caching configuration needed - MaxMind has built-in caching +``` + +## Usage + +### Rake Tasks + +#### Database Management +```bash +# Download/update the GeoIP database +rails geoip:update + +# Check database status and configuration +rails geoip:status + +# Test the implementation with sample IPs +rails geoip:test + +# Manual lookup for a specific IP +rails geoip:lookup[8.8.8.8] +rails geoip:lookup[2001:4860:4860::8888] +``` + +#### Data Management +```bash +# Enrich existing events missing country codes +rails geoip:enrich_missing + +# Clean up old inactive database records +rails geoip:cleanup +``` + +### Ruby API + +#### Service-Level Lookups +```ruby +# Direct country lookup +country = GeoIpService.lookup_country('8.8.8.8') +# => "US" + +# Batch lookup +countries = GeoIpService.new.lookup_countries(['8.8.8.8', '1.1.1.1']) +# => { "8.8.8.8" => "US", "1.1.1.1" => nil } + +# Check database availability +service = GeoIpService.new +service.database_available? # => true/false +service.database_info # => Database metadata +``` + +#### Event Model Integration +```ruby +# Automatic enrichment during event processing +event = Event.find(123) +event.enrich_geo_location! # Updates event with country code +event.lookup_country # => "US" (with fallback to service) +event.has_geo_data? # => true/false +event.geo_location # => { country_code: "US", city: nil, ... } + +# Batch enrichment of existing events +updated_count = Event.enrich_geo_location_batch +puts "Enriched #{updated_count} events with geo data" +``` + +#### IP Range Model Integration +```ruby +# IPv4 Range lookups +range = Ipv4Range.find(123) +range.geo_lookup_country! # Updates range with country code +range.geo_lookup_country # => "US" (without updating) +range.has_country_info? # => true/false +range.primary_country # => "US" (best available country) + +# Class methods +country = Ipv4Range.lookup_country_by_ip('8.8.8.8') +updated_count = Ipv4Range.enrich_missing_geo_data(limit: 1000) + +# IPv6 Range lookups (same interface) +country = Ipv6Range.lookup_country_by_ip('2001:4860:4860::8888') +updated_count = Ipv6Range.enrich_missing_geo_data(limit: 1000) +``` + +### Background Processing + +#### Automatic Updates +The system automatically schedules database updates: +```ruby +# Manually trigger an update (usually scheduled automatically) +UpdateGeoIpDatabaseJob.perform_later + +# Force update regardless of age +UpdateGeoIpDatabaseJob.perform_later(force_update: true) +``` + +#### Event Processing Integration +Geo-location enrichment is automatically included in WAF event processing: +```ruby +# This is called automatically in ProcessWafEventJob +event = Event.create_from_waf_payload!(event_id, payload, project) +event.enrich_geo_location! if event.ip_address.present? && event.country_code.blank? +``` + +## Database Information + +### GeoLite2-Country Database +- **Source**: MaxMind GeoLite2-Country (free version) +- **Update Frequency**: Weekly (Tuesdays) +- **Size**: ~9.5 MB +- **Coverage**: Global IP-to-country mapping +- **Format**: MaxMind DB (.mmdb) + +### Database Fields +- `country.iso_code` - Two-letter ISO country code +- Supports both IPv4 and IPv6 addresses +- Includes anonymous/proxy detection metadata + +## Performance Considerations + +### Performance +- MaxMind DB has built-in internal caching optimized for lookups +- Typical lookup time: <1ms +- Database size optimized for fast lookups +- No additional caching layer needed + +### Lookup Performance +- Typical lookup time: <1ms +- Database size optimized for fast lookups +- Efficient range queries for IP networks + +### Memory Usage +- Database loaded into memory for fast access +- Approximate memory usage: 15-20 MB for the country database +- Automatic cleanup of old database files + +## Error Handling + +### Graceful Degradation +- Service returns `nil` when database unavailable +- Logging at appropriate levels for different error types +- Event processing continues even if geo-location fails + +### Common Error Scenarios +1. **Database Missing** - Automatic download triggered +2. **Database Corrupted** - Automatic re-download attempted +3. **Network Issues** - Graceful fallback with error logging +4. **Invalid IP Address** - Returns `nil` with warning log + +## Troubleshooting + +### Check System Status +```bash +# Verify database status +rails geoip:status + +# Test with known IPs +rails geoip:test + +# Check logs for errors +tail -f log/production.log | grep GeoIP +``` + +### Common Issues + +#### Database Not Available +```bash +# Force database update +rails geoip:update + +# Check file permissions +ls -la db/geoip/ +``` + +#### Lookup Failures +```bash +# Test specific IPs +rails geoip:lookup[8.8.8.8] + +# Check database validity +rails runner "puts GeoIpService.new.database_available?" +``` + +#### Performance Issues +- Increase cache size in configuration +- Check memory usage on deployment server +- Monitor lookup times with application metrics + +## Monitoring & Maintenance + +### Health Checks +```ruby +# Rails console health check +service = GeoIpService.new +puts "Database available: #{service.database_available?}" +puts "Database age: #{service.database_record&.age_in_days} days" +``` + +### Scheduled Maintenance +- Database automatically updated weekly +- Old database files cleaned up after 7 days +- No manual maintenance required + +### Monitoring Metrics +Consider monitoring: +- Database update success/failure rates +- Lookup performance (response times) +- Database age and freshness +- Cache hit/miss ratios + +## Security & Privacy + +### Data Privacy +- No personal data stored in the GeoIP database +- Only country-level information provided +- No tracking or logging of IP lookups by default + +### Network Security +- Database downloaded from official MaxMind CDN +- File integrity validated with MD5 checksums +- Secure temporary file handling during updates + +## API Reference + +### GeoIpService + +#### Class Methods +- `lookup_country(ip_address)` - Direct lookup +- `update_database!` - Force database update + +#### Instance Methods +- `lookup_country(ip_address)` - Country lookup +- `lookup_countries(ip_addresses)` - Batch lookup +- `database_available?` - Check database status +- `database_info` - Get database metadata +- `update_from_remote!` - Download new database + +### Model Methods + +#### Event Model +- `enrich_geo_location!` - Update with country code +- `lookup_country` - Get country code (with fallback) +- `has_geo_data?` - Check if geo data exists +- `geo_location` - Get full geo location hash + +#### IPv4Range/IPv6Range Models +- `geo_lookup_country!` - Update range with country code +- `geo_lookup_country` - Get country code (without update) +- `has_country_info?` - Check for existing country data +- `primary_country` - Get best available country code +- `lookup_country_by_ip(ip)` - Class method for IP lookup +- `enrich_missing_geo_data(limit:)` - Class method for batch enrichment + +## Support & Resources + +### MaxMind Documentation +- [MaxMind Developer Site](https://dev.maxmind.com/) +- [GeoLite2 Databases](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data) +- [Database Accuracy](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data#accuracy) + +### Ruby Libraries +- [maxmind-db gem](https://github.com/maxmind/MaxMind-DB-Reader-ruby) +- [httparty gem](https://github.com/jnunemaker/httparty) + +### Troubleshooting Resources +- Application logs: `log/production.log` +- Rails console for manual testing +- Database status via `rails geoip:status` \ No newline at end of file diff --git a/docs/rule-architecture.md b/docs/rule-architecture.md new file mode 100644 index 0000000..641fff3 --- /dev/null +++ b/docs/rule-architecture.md @@ -0,0 +1,625 @@ +# Baffle Hub - Rule Architecture + +## Overview + +Baffle Hub uses a distributed rule system where the Hub generates and manages rules, and Agents download and enforce them locally using optimized SQLite queries. This architecture provides sub-millisecond rule evaluation while maintaining centralized intelligence and control. + +## Core Principles + +1. **Hub-side Intelligence**: Pattern detection and rule generation happens on the Hub +2. **Agent-side Enforcement**: Rule evaluation happens locally on Agents for speed +3. **Incremental Sync**: Agents poll for rule updates using timestamp-based cursors +4. **Dynamic Backpressure**: Hub controls event sampling based on load +5. **Temporal Rules**: Rules can expire automatically (e.g., 24-hour bans) +6. **Soft Deletes**: Rules are disabled, not deleted, for proper sync and audit trail + +## Rule Types + +### 1. Network Rules (`network_v4`, `network_v6`) + +Block or allow traffic based on IP address or CIDR ranges. + +**Use Cases**: +- Block scanner IPs (temporary or permanent) +- Block datacenter/VPN/proxy ranges +- Allow trusted IP ranges +- Geographic blocking via IP ranges + +**Evaluation**: +- **Most specific CIDR wins** (smallest prefix) +- `/32` beats `/24` beats `/16` beats `/8` +- Agent uses optimized range queries on `ipv4_ranges`/`ipv6_ranges` tables + +**Example**: +```json +{ + "id": 12341, + "rule_type": "network_v4", + "action": "deny", + "conditions": { "cidr": "185.220.100.0/22" }, + "priority": 22, + "expires_at": "2024-11-04T12:00:00Z", + "enabled": true, + "source": "auto:scanner_detected", + "metadata": { + "reason": "Tor exit node hitting /.env", + "auto_generated": true + } +} +``` + +### 2. Rate Limit Rules (`rate_limit`) + +Control request rate per IP or per CIDR range. + +**Scopes** (Phase 1): +- **Global per-IP**: Limit requests per IP across all paths +- **Per-CIDR**: Different limits for different network ranges + +**Scopes** (Phase 2+): +- **Per-path per-IP**: Different limits for `/api/*`, `/login`, etc. + +**Evaluation**: +- Agent maintains in-memory counters per IP +- Finds most specific CIDR rule for the IP +- Applies that rule's rate limit configuration +- Optional: Persist counters to SQLite for restart resilience + +**Example (Phase 1)**: +```json +{ + "id": 12342, + "rule_type": "rate_limit", + "action": "rate_limit", + "conditions": { + "cidr": "0.0.0.0/0", + "scope": "global" + }, + "priority": 0, + "enabled": true, + "source": "manual", + "metadata": { + "limit": 100, + "window": 60, + "per_ip": true + } +} +``` + +**Example (Phase 2+)**: +```json +{ + "id": 12343, + "rule_type": "rate_limit", + "action": "rate_limit", + "conditions": { + "cidr": "0.0.0.0/0", + "scope": "per_path", + "path_pattern": "/api/login" + }, + "metadata": { + "limit": 5, + "window": 60, + "per_ip": true + } +} +``` + +### 3. Path Pattern Rules (`path_pattern`) + +Detect suspicious path access patterns (mainly for Hub analytics). + +**Use Cases**: +- Detect scanners hitting `/.env`, `/.git`, `/wp-admin` +- Identify bots with suspicious path traversal +- Trigger automatic IP bans when patterns match + +**Evaluation**: +- Agent does lightweight pattern matching +- When matched, sends event to Hub with `matched_pattern: true` +- Hub analyzes and creates IP block rules if needed +- Agent picks up new IP block rule in next sync (~10s) + +**Example**: +```json +{ + "id": 12344, + "rule_type": "path_pattern", + "action": "log", + "conditions": { + "patterns": ["/.env", "/.git/*", "/wp-admin/*", "/.aws/*", "/phpMyAdmin/*"] + }, + "enabled": true, + "source": "default:scanner_detection", + "metadata": { + "auto_ban_ip": true, + "ban_duration_hours": 24, + "description": "Common scanner paths" + } +} +``` + +## Rule Actions + +| Action | Description | HTTP Response | +|--------|-------------|---------------| +| `allow` | Pass request through | Continue to app | +| `deny` | Block request | 403 Forbidden | +| `rate_limit` | Enforce rate limit | 429 Too Many Requests | +| `redirect` | Redirect to URL | 301/302 + Location header | +| `challenge` | Show CAPTCHA (Phase 2+) | 403 with challenge | +| `log` | Log only, don't block | Continue to app | + +## Rule Priority & Specificity + +### Network Rules +- **Priority is determined by CIDR prefix length** +- Smaller prefix (more specific) = higher priority +- `/32` (single IP) beats `/24` (256 IPs) beats `/8` (16M IPs) +- Example: Block `10.0.0.0/8` but allow `10.0.1.0/24` + - Request from `10.0.1.5` → matches `/24` → allowed + - Request from `10.0.2.5` → matches `/8` only → blocked + +### Rate Limit Rules +- Most specific CIDR match wins +- Per-path rules take precedence over global (Phase 2+) + +### Path Pattern Rules +- All patterns are evaluated (not exclusive) +- Used for detection, not blocking +- Multiple pattern matches = stronger signal for ban + +## Rule Synchronization + +### Timestamp-Based Cursor + +Agents use `updated_at` timestamps as sync cursors to handle rule updates and deletions. + +**Why `updated_at` instead of `id`?** +- Handles rule updates (e.g., disabling a rule updates `updated_at`) +- Handles rule deletions via `enabled=false` flag +- Simple for agents: "give me everything that changed since X" + +**Agent Sync Flow**: +``` +1. Agent starts: last_sync = nil +2. GET /api/:key/rules → Full sync, store latest updated_at +3. Every 10s or 1000 events: GET /api/:key/rules?since=