Many updates

This commit is contained in:
Dan Milne
2025-11-13 14:42:43 +11:00
parent 5e5198f113
commit df94ac9720
41 changed files with 4760 additions and 516 deletions

View File

@@ -4,6 +4,10 @@ class Event < ApplicationRecord
# Normalized association for hosts (most valuable compression)
belongs_to :request_host, optional: true
# WAF rule associations
belongs_to :rule, optional: true
has_one :waf_policy, through: :rule
# Enums for fixed value sets
enum :waf_action, {
allow: 0, # allow/pass
@@ -29,7 +33,7 @@ class Event < ApplicationRecord
# This provides direct array access and efficient indexing
attribute :tags, :json, default: -> { [] }
validates :event_id, presence: true, uniqueness: true
validates :request_id, presence: true, uniqueness: true
validates :timestamp, presence: true
scope :recent, -> { order(timestamp: :desc) }
@@ -55,32 +59,42 @@ class Event < ApplicationRecord
where("tags @> ARRAY[?]", tag_array)
}
# Network-based filtering scopes
# Network-based filtering scopes - now using denormalized columns
scope :by_company, ->(company) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.company ILIKE ?", "%#{company}%")
where("company ILIKE ?", "%#{company}%")
}
scope :by_country, ->(country) {
where(country: country)
}
scope :by_network_type, ->(type) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.case(type)
.when("datacenter") { where("network_ranges.is_datacenter = ?", true) }
.when("vpn") { where("network_ranges.is_vpn = ?", true) }
.when("proxy") { where("network_ranges.is_proxy = ?", true) }
.when("standard") { where("network_ranges.is_datacenter = ? AND network_ranges.is_vpn = ? AND network_ranges.is_proxy = ?", false, false, false) }
.else { none }
case type.to_s.downcase
when "datacenter"
where(is_datacenter: true)
when "vpn"
where(is_vpn: true)
when "proxy"
where(is_proxy: true)
when "standard"
where(is_datacenter: false, is_vpn: false, is_proxy: false)
else
none
end
}
scope :by_asn, ->(asn) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.asn = ?", asn.to_i)
where(asn: asn.to_i)
}
scope :by_network_cidr, ->(cidr) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.network = ?", cidr)
# This still requires a join since we need to match CIDR
joins(:network_range).where("network_ranges.network = ?", cidr)
}
# Add association for the optional network_range_id
belongs_to :network_range, optional: true
# Path prefix matching using range queries (uses B-tree index efficiently)
scope :with_path_prefix, ->(prefix_segment_ids) {
return none if prefix_segment_ids.blank?
@@ -130,13 +144,39 @@ class Event < ApplicationRecord
# Normalize event fields after extraction
after_validation :normalize_event_fields, if: :should_normalize?
def self.create_from_waf_payload!(event_id, payload)
# Populate network intelligence from IP address
before_save :populate_network_intelligence, if: :should_populate_network_intelligence?
# Backfill network intelligence for all events
def self.backfill_network_intelligence!(batch_size: 10_000)
total = where(country: nil).count
return 0 if total.zero?
puts "Backfilling network intelligence for #{total} events..."
processed = 0
where(country: nil).find_in_batches(batch_size: batch_size) do |batch|
batch.each(&:save) # Triggers before_save callback
processed += batch.size
puts " Processed #{processed}/#{total} (#{(processed.to_f / total * 100).round(1)}%)"
end
processed
end
# Backfill network intelligence for a specific event
def backfill_network_intelligence!
populate_network_intelligence
save!
end
def self.create_from_waf_payload!(request_id, payload)
# Normalize headers in payload during import phase
normalized_payload = normalize_payload_headers(payload)
# Create the WAF request event
create!(
event_id: event_id,
request_id: request_id,
timestamp: parse_timestamp(normalized_payload["timestamp"]),
payload: normalized_payload,
@@ -150,7 +190,8 @@ class Event < ApplicationRecord
response_status: normalized_payload.dig("response", "status_code"),
response_time_ms: normalized_payload.dig("response", "duration_ms"),
waf_action: normalize_action(normalized_payload["waf_action"]), # Normalize incoming action values
rule_matched: normalized_payload["rule_matched"],
# Support both new (rule_id) and old (rule_matched) field names during cutover
rule_id: normalized_payload["rule_id"] || normalized_payload["rule_matched"],
blocked_reason: normalized_payload["blocked_reason"],
# Server/Environment info
@@ -283,7 +324,7 @@ class Event < ApplicationRecord
end
def rule_matched?
rule_matched.present?
rule_id.present?
end
# New path methods for normalization
@@ -343,40 +384,39 @@ class Event < ApplicationRecord
end
def most_specific_range
matching_network_ranges.first
# Use the cached network_range_id if available (much faster)
return NetworkRange.find_by(id: network_range_id) if network_range_id.present?
# Fallback to expensive lookup
matching_network_ranges.first&.dig(:range)
end
def broadest_range
matching_network_ranges.last
matching_network_ranges.last&.dig(:range)
end
def network_intelligence
most_specific_range&.dig(:intelligence) || {}
# Use denormalized fields instead of expensive lookup
{
country: country,
company: company,
asn: asn,
asn_org: asn_org,
is_datacenter: is_datacenter,
is_vpn: is_vpn,
is_proxy: is_proxy
}
end
def company
network_intelligence[:company]
end
def asn
network_intelligence[:asn]
end
def asn_org
network_intelligence[:asn_org]
end
def is_datacenter?
network_intelligence[:is_datacenter] || false
end
def is_proxy?
network_intelligence[:is_proxy] || false
end
def is_vpn?
network_intelligence[:is_vpn] || false
end
# Denormalized attribute accessors - these now use the columns directly
# No need to override - Rails provides these automatically:
# - country (column)
# - company (column)
# - asn (column)
# - asn_org (column)
# - is_datacenter (column)
# - is_vpn (column)
# - is_proxy (column)
# IP validation
def valid_ipv4?
@@ -480,7 +520,8 @@ class Event < ApplicationRecord
self.request_url = request_data["url"]
self.response_status = response_data["status_code"]
self.response_time_ms = response_data["duration_ms"]
self.rule_matched = payload["rule_matched"]
# Support both new (rule_id) and old (rule_matched) field names during cutover
self.rule_id = payload["rule_id"] || payload["rule_matched"]
self.blocked_reason = payload["blocked_reason"]
# Store original values for normalization only if they don't exist yet

View File

@@ -116,7 +116,7 @@ class NetworkRange < ApplicationRecord
# Parent/child relationships
def parent_ranges
NetworkRange.where("network << ?::inet AND masklen(network) < ?", network.to_s, prefix_length)
NetworkRange.where("?::inet << network AND masklen(network) < ?", network.to_s, prefix_length)
.order("masklen(network) DESC")
end
@@ -142,6 +142,59 @@ class NetworkRange < ApplicationRecord
.first
end
# Check if this network or any parent has IPAPI data
def has_ipapi_data_available?
return true if has_network_data_from?(:ipapi)
parent_ranges.any? { |parent| parent.has_network_data_from?(:ipapi) }
end
# Generic API fetching status management
def is_fetching_api_data?(source)
fetching_status = network_data&.dig('fetching_status') || {}
fetching_status[source.to_s] &&
fetching_status[source.to_s]['started_at'] &&
fetching_status[source.to_s]['started_at'] > 5.minutes.ago.to_f
end
def mark_as_fetching_api_data!(source)
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
end
def clear_fetching_status!(source)
if network_data&.dig('fetching_status')&.dig(source.to_s)
self.network_data['fetching_status'].delete(source.to_s)
# Clean up empty fetching_status hash
self.network_data.delete('fetching_status') if self.network_data['fetching_status'].empty?
save!
end
end
# Check if we should fetch API data (not available and not currently being fetched)
def should_fetch_api_data?(source)
return false if send("has_network_data_from?(#{source})") if respond_to?("has_network_data_from?(#{source})")
return false if is_fetching_api_data?(source)
true
end
# Check if this network or any parent has IPAPI data available and no active fetch
def should_fetch_ipapi_data?
return false if has_ipapi_data_available?
return false if is_fetching_api_data?(:ipapi)
# Also check if any parent is currently fetching IPAPI data
return false if parent_ranges.any? { |parent| parent.is_fetching_api_data?(:ipapi) }
true
end
def inherited_intelligence
return own_intelligence if has_intelligence?
@@ -168,6 +221,12 @@ class NetworkRange < ApplicationRecord
}
end
def agent_tally
# Rails.cache.fetch("#{to_s}:agent_tally", expires_in: 5.minutes) do
events.map(&:user_agent).tally
# end
end
# Geographic lookup
def geo_lookup_country!
return if country.present?
@@ -189,6 +248,12 @@ class NetworkRange < ApplicationRecord
where("network && ?", range_cidr)
end
def self.findd(cidr)
cidr = cidr.gsub("_", "/")
cidr = "#{cidr}/24" unless cidr.include?("/")
find_by(network: cidr)
end
def self.find_or_create_by_cidr(cidr, user: nil, source: nil, reason: nil)
find_or_create_by(network: cidr) do |range|
range.user = user
@@ -246,6 +311,63 @@ class NetworkRange < ApplicationRecord
network_data&.key?(source.to_s) && network_data[source.to_s].present?
end
# IPAPI tracking at /24 granularity
# Find or create the /24 network for a given IP address
def self.find_or_create_tracking_network_for_ip(ip_address)
ip = IPAddr.new(ip_address.to_s)
# Create /24 network for IPv4, /64 for IPv6
tracking_cidr = if ip.ipv4?
"#{ip.mask(24)}/24"
else
"#{ip.mask(64)}/64"
end
find_or_create_by(network: tracking_cidr) do |range|
range.source = 'auto_generated'
range.creation_reason = 'IPAPI tracking network'
end
end
# Check if we should fetch IPAPI data for a given IP address
# Uses /24 networks as the tracking unit
def self.should_fetch_ipapi_for_ip?(ip_address)
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
# Check if /24 has been queried recently
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
return true if queried_at.nil?
# Check if IPAPI returned a CIDR that actually covers this IP
# (handles edge case where IPAPI returns /25 or more specific)
returned_cidr = tracking_network.network_data&.dig('ipapi_returned_cidr')
if returned_cidr.present?
begin
returned_range = IPAddr.new(returned_cidr)
ip = IPAddr.new(ip_address.to_s)
# If the IP is NOT covered by what IPAPI returned, fetch again
return true unless returned_range.include?(ip)
rescue IPAddr::InvalidAddressError => e
Rails.logger.warn "Invalid CIDR stored in ipapi_returned_cidr: #{returned_cidr}"
end
end
# Re-query after 1 year
Time.at(queried_at) < 1.year.ago
rescue => e
Rails.logger.error "Error checking IPAPI fetch status for #{ip_address}: #{e.message}"
true # Default to fetching on error
end
# Mark that we've queried IPAPI for this /24 network
# @param returned_cidr [String] The CIDR that IPAPI actually returned (may be more specific than /24)
def mark_ipapi_queried!(returned_cidr)
self.network_data ||= {}
self.network_data['ipapi_queried_at'] = Time.current.to_i
self.network_data['ipapi_returned_cidr'] = returned_cidr
save!
end
# String representations
def to_s
cidr
@@ -261,10 +383,12 @@ class NetworkRange < ApplicationRecord
self[:events_count] || 0
end
def events
Event.where("ip_address <<= ?", cidr)
end
def recent_events(limit: 100)
Event.where(ip_address: child_ranges.pluck(:network_address) + [network_address])
.recent
.limit(limit)
events.recent.limit(limit)
end
def blocking_rules

View File

@@ -5,7 +5,11 @@
# Rules define actions to take for matching traffic conditions.
# Network rules are associated with NetworkRange objects for rich context.
class Rule < ApplicationRecord
# Rule types and actions
# Rule enums
enum :waf_action, { allow: 0, deny: 1, rate_limit: 2, redirect: 3, log: 4, challenge: 5 }, scopes: false, prefix: true
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, scopes: false, prefix: true
# Legacy string constants for backward compatibility
RULE_TYPES = %w[network rate_limit path_pattern].freeze
ACTIONS = %w[allow deny rate_limit redirect log challenge].freeze
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
@@ -14,14 +18,42 @@ class Rule < ApplicationRecord
belongs_to :user
belongs_to :network_range, optional: true
belongs_to :waf_policy, optional: true
has_many :events, dependent: :nullify
# Backward compatibility accessors for transition period
def action
waf_action
end
def action=(value)
self.waf_action = value
self[:action] = value # Also set the legacy column
end
def rule_type
waf_rule_type
end
def rule_type=(value)
self.waf_rule_type = value
self[:rule_type] = value # Also set the legacy column
end
# Validations
validates :rule_type, presence: true, inclusion: { in: RULE_TYPES }
validates :action, presence: true, inclusion: { in: ACTIONS }
validates :waf_rule_type, presence: true, inclusion: { in: waf_rule_types.keys }
validates :waf_action, presence: true, inclusion: { in: waf_actions.keys }
validates :conditions, presence: true, unless: :network_rule?
validates :enabled, inclusion: { in: [true, false] }
validates :source, inclusion: { in: SOURCES }
# Legacy enum definitions (disabled to prevent conflicts)
# enum :action, { allow: "allow", deny: "deny", rate_limit: "rate_limit", redirect: "redirect", log: "log", challenge: "challenge" }, scopes: false
# enum :rule_type, { network: "network", rate_limit: "rate_limit", path_pattern: "path_pattern" }, scopes: false
# Legacy validations for backward compatibility during transition
# validates :rule_type, presence: true, inclusion: { in: RULE_TYPES }, allow_nil: true
# validates :action, presence: true, inclusion: { in: ACTIONS }, allow_nil: true
# Custom validations
validate :validate_conditions_by_type
validate :validate_metadata_by_action
@@ -33,16 +65,22 @@ class Rule < ApplicationRecord
scope :disabled, -> { where(enabled: false) }
scope :active, -> { enabled.where("expires_at IS NULL OR expires_at > ?", Time.current) }
scope :expired, -> { where("expires_at IS NOT NULL AND expires_at <= ?", Time.current) }
scope :by_type, ->(type) { where(rule_type: type) }
scope :network_rules, -> { where(rule_type: "network") }
scope :rate_limit_rules, -> { where(rule_type: "rate_limit") }
scope :path_pattern_rules, -> { where(rule_type: "path_pattern") }
scope :by_type, ->(type) { where(waf_rule_type: type) }
scope :network_rules, -> { network }
scope :rate_limit_rules, -> { rate_limit }
scope :path_pattern_rules, -> { path_pattern }
scope :by_source, ->(source) { where(source: source) }
scope :surgical_blocks, -> { where(source: "manual:surgical_block") }
scope :surgical_exceptions, -> { where(source: "manual:surgical_exception") }
scope :policy_generated, -> { where(source: "policy") }
scope :from_waf_policy, ->(waf_policy) { where(waf_policy: waf_policy) }
# Legacy scopes for backward compatibility
scope :by_type_legacy, ->(type) { where(rule_type: type) }
scope :network_rules_legacy, -> { where(rule_type: "network") }
scope :rate_limit_rules_legacy, -> { where(rule_type: "rate_limit") }
scope :path_pattern_rules_legacy, -> { where(rule_type: "path_pattern") }
# Sync queries
scope :since, ->(timestamp) { where("updated_at >= ?", Time.at(timestamp)).order(:updated_at, :id) }
scope :sync_order, -> { order(:updated_at, :id) }
@@ -51,18 +89,19 @@ class Rule < ApplicationRecord
before_validation :set_defaults
before_validation :parse_json_fields
before_save :calculate_priority_for_network_rules
before_save :sync_legacy_columns
# Rule type checks
def network_rule?
rule_type == "network"
waf_rule_type_network?
end
def rate_limit_rule?
rule_type == "rate_limit"
waf_rule_type_rate_limit?
end
def path_pattern_rule?
rule_type == "path_pattern"
waf_rule_type_path_pattern?
end
# Network-specific methods
@@ -104,16 +143,16 @@ class Rule < ApplicationRecord
# Action-specific methods
def redirect_action?
action == "redirect"
waf_action_redirect?
end
def challenge_action?
action == "challenge"
waf_action_challenge?
end
# Redirect/challenge convenience methods
def redirect_url
metadata&.dig('redirect_url')
metadata_hash['redirect_url']
end
def redirect_status
@@ -162,12 +201,13 @@ class Rule < ApplicationRecord
end
def disable!(reason: nil)
new_metadata = metadata_hash.merge(
disabled_at: Time.current.iso8601,
disabled_reason: reason
)
update!(
enabled: false,
metadata: metadata.merge(
disabled_at: Time.current.iso8601,
disabled_reason: reason
)
metadata: new_metadata
)
end
@@ -180,8 +220,8 @@ class Rule < ApplicationRecord
def to_agent_format
format = {
id: id,
rule_type: rule_type,
waf_action: action, # Agents expect 'waf_action' field
waf_rule_type: waf_rule_type,
waf_action: waf_action, # Use the enum field directly
conditions: agent_conditions,
priority: agent_priority,
expires_at: expires_at&.to_i, # Agents expect Unix timestamps
@@ -224,8 +264,8 @@ class Rule < ApplicationRecord
network_range = NetworkRange.find_or_create_by_cidr(cidr, user: user, source: 'user_created')
create!(
rule_type: 'network',
action: action,
waf_rule_type: 'network',
waf_action: action,
network_range: network_range,
user: user,
**options
@@ -237,8 +277,8 @@ class Rule < ApplicationRecord
network_range = NetworkRange.find_or_create_by_cidr(parent_cidr, user: user, source: 'user_created')
block_rule = create!(
rule_type: 'network',
action: 'deny',
waf_rule_type: 'network',
waf_action: 'deny',
network_range: network_range,
source: 'manual:surgical_block',
user: user,
@@ -255,8 +295,8 @@ class Rule < ApplicationRecord
ip_network_range = NetworkRange.find_or_create_by_cidr("#{ip_address}/#{ip_address.include?(':') ? '128' : '32'}", user: user, source: 'user_created')
exception_rule = create!(
rule_type: 'network',
action: 'allow',
waf_rule_type: 'network',
waf_action: 'allow',
network_range: ip_network_range,
source: 'manual:surgical_exception',
user: user,
@@ -277,8 +317,8 @@ class Rule < ApplicationRecord
network_range = NetworkRange.find_or_create_by_cidr(cidr, user: user, source: 'user_created')
create!(
rule_type: 'rate_limit',
action: 'rate_limit',
waf_rule_type: 'rate_limit',
waf_action: 'rate_limit',
network_range: network_range,
conditions: { cidr: cidr, scope: 'ip' },
metadata: {
@@ -307,7 +347,7 @@ class Rule < ApplicationRecord
# This would need efficient IP range queries
# For now, simple IP match
Event.where(ip_address: network_range.network_address)
Event.where("ip_address <<= ?", network_range.cidr)
.recent
.limit(limit)
end
@@ -324,6 +364,18 @@ class Rule < ApplicationRecord
}
end
# Helper method to safely access metadata as hash
def metadata_hash
case metadata
when Hash
metadata
when String
metadata.present? ? (JSON.parse(metadata) rescue {}) : {}
else
{}
end
end
private
def set_defaults
@@ -361,7 +413,7 @@ class Rule < ApplicationRecord
end
def validate_conditions_by_type
case rule_type
case waf_rule_type
when "network"
# Network rules don't need conditions in DB - stored in network_range
true
@@ -394,7 +446,7 @@ class Rule < ApplicationRecord
end
def validate_metadata_by_action
case action
case waf_action
when "redirect"
unless metadata&.dig("redirect_url").present?
errors.add(:metadata, "must include 'redirect_url' for redirect action")
@@ -457,4 +509,14 @@ class Rule < ApplicationRecord
self.metadata ||= {}
end
def sync_legacy_columns
# Sync enum values to legacy string columns for backward compatibility
if waf_action.present?
self[:action] = waf_action
end
if waf_rule_type.present?
self[:rule_type] = waf_rule_type
end
end
end