Allow filtering the rules to make finding them easy

This commit is contained in:
Dan Milne
2026-01-18 23:38:07 +11:00
parent e2b6db2f48
commit 24dc355f56
4 changed files with 178 additions and 102 deletions

View File

@@ -132,7 +132,7 @@ class Event < ApplicationRecord
# Use raw SQL to bypass serializer (it expects Array but we're comparing strings)
where("request_segment_ids = ? OR (request_segment_ids >= ? AND request_segment_ids < ?)",
prefix_str, lower_prefix_str, upper_str)
prefix_str, lower_prefix_str, upper_str)
}
# Path depth queries
@@ -145,11 +145,11 @@ class Event < ApplicationRecord
}
# Analytics: Get response time percentiles over different time windows
def self.response_time_percentiles(windows: { hour: 1.hour, day: 1.day, week: 1.week })
def self.response_time_percentiles(windows: {hour: 1.hour, day: 1.day, week: 1.week})
results = {}
windows.each do |label, duration|
scope = where('timestamp >= ?', duration.ago)
scope = where("timestamp >= ?", duration.ago)
stats = scope.pick(
Arel.sql(<<~SQL.squish)
@@ -168,7 +168,7 @@ class Event < ApplicationRecord
count: stats[3]
}
else
{ p50: nil, p95: nil, p99: nil, count: 0 }
{p50: nil, p95: nil, p99: nil, count: 0}
end
end
@@ -184,7 +184,7 @@ class Event < ApplicationRecord
return request_path if request_segment_ids.blank?
segments = PathSegment.where(id: request_segment_ids).index_by(&:id)
'/' + request_segment_ids.map { |id| segments[id]&.segment }.compact.join('/')
"/" + request_segment_ids.map { |id| segments[id]&.segment }.compact.join("/")
end
# Extract key fields from payload before saving
@@ -370,19 +370,19 @@ class Event < ApplicationRecord
end
def blocked?
waf_action == 'deny' # deny = 0
waf_action == "deny" # deny = 0
end
def allowed?
waf_action == 'allow' # allow = 1
waf_action == "allow" # allow = 1
end
def logged?
waf_action == 'log'
waf_action == "log"
end
def challenged?
waf_action == 'challenge'
waf_action == "challenge"
end
def rule_matched?
@@ -392,7 +392,7 @@ class Event < ApplicationRecord
# New path methods for normalization
def path_segments
return [] unless request_path.present?
request_path.split('/').reject(&:blank?)
request_path.split("/").reject(&:blank?)
end
def path_segments_array
@@ -401,7 +401,11 @@ class Event < ApplicationRecord
def request_hostname
return nil unless request_url.present?
URI.parse(request_url).hostname rescue nil
begin
URI.parse(request_url).hostname
rescue
nil
end
end
# Tag helper methods
@@ -420,7 +424,7 @@ class Event < ApplicationRecord
end
def tag_list
tags.join(', ')
tags.join(", ")
end
# Normalize headers to lower case keys during import phase
@@ -510,10 +514,10 @@ class Event < ApplicationRecord
# Find rules for those ranges, ordered by priority (most specific first)
Rule.network_rules
.where(network_range_id: range_ids)
.enabled
.includes(:network_range)
.order('masklen(network_ranges.network) DESC')
.where(network_range_id: range_ids)
.enabled
.includes(:network_range)
.order("masklen(network_ranges.network) DESC")
end
def active_blocking_rules
@@ -586,9 +590,9 @@ class Event < ApplicationRecord
# Find most specific network range with actual GeoIP data
# This might be more specific (e.g., /25) or broader (e.g., /22) than the /24
data_range = NetworkRange.where("network >>= ?", ip_string)
.where.not(country: nil) # Must have actual data
.order(Arel.sql("masklen(network) DESC"))
.first
.where.not(country: nil) # Must have actual data
.order(Arel.sql("masklen(network) DESC"))
.first
# Use the most specific range with data, or fall back to tracking network
range = data_range || tracking_network
@@ -645,9 +649,13 @@ class Event < ApplicationRecord
# Find or create the tracking network
NetworkRange.find_or_create_by!(network: network_cidr) do |nr|
nr.source = 'auto_generated'
nr.creation_reason = 'tracking unit for IPAPI deduplication'
nr.is_datacenter = NetworkRangeGenerator.datacenter_ip?(ip_addr) rescue false
nr.source = "auto_generated"
nr.creation_reason = "tracking unit for IPAPI deduplication"
nr.is_datacenter = begin
NetworkRangeGenerator.datacenter_ip?(ip_addr)
rescue
false
end
nr.is_vpn = false
nr.is_proxy = false
end
@@ -663,17 +671,17 @@ class Event < ApplicationRecord
# Private and reserved ranges
[
IPAddr.new('10.0.0.0/8'),
IPAddr.new('172.16.0.0/12'),
IPAddr.new('192.168.0.0/16'),
IPAddr.new('127.0.0.0/8'),
IPAddr.new('169.254.0.0/16'),
IPAddr.new('224.0.0.0/4'),
IPAddr.new('240.0.0.0/4'),
IPAddr.new('::1/128'),
IPAddr.new('fc00::/7'),
IPAddr.new('fe80::/10'),
IPAddr.new('ff00::/8')
IPAddr.new("10.0.0.0/8"),
IPAddr.new("172.16.0.0/12"),
IPAddr.new("192.168.0.0/16"),
IPAddr.new("127.0.0.0/8"),
IPAddr.new("169.254.0.0/16"),
IPAddr.new("224.0.0.0/4"),
IPAddr.new("240.0.0.0/4"),
IPAddr.new("::1/128"),
IPAddr.new("fc00::/7"),
IPAddr.new("fe80::/10"),
IPAddr.new("ff00::/8")
].any? { |range| range.include?(ip) }
rescue IPAddr::InvalidAddressError
true # Treat invalid IPs as "reserved"
@@ -711,7 +719,6 @@ class Event < ApplicationRecord
self.server_name = payload["server_name"]
self.environment = payload["environment"]
# Extract agent info
agent_data = payload.dig("agent") || {}
self.agent_version = agent_data["version"]
@@ -742,7 +749,7 @@ class Event < ApplicationRecord
detector = DeviceDetector.new(user_agent)
if detector.bot?
# Add bot tag with specific bot name
bot_name = detector.bot_name&.downcase&.gsub(/\s+/, '_') || 'unknown'
bot_name = detector.bot_name&.downcase&.gsub(/\s+/, "_") || "unknown"
add_tag("bot:#{bot_name}")
return true
end
@@ -756,23 +763,23 @@ class Event < ApplicationRecord
range = NetworkRange.find_by(id: network_range_id)
if range
# Check if the network range source indicates a bot import
if range.source&.start_with?('bot_import_')
if range.source&.start_with?("bot_import_")
# Extract bot type from source (e.g., 'bot_import_googlebot' -> 'googlebot')
bot_type = range.source.sub('bot_import_', '')
bot_type = range.source.sub("bot_import_", "")
add_tag("bot:#{bot_type}")
add_tag("network:#{range.company&.downcase&.gsub(/\s+/, '_')}") if range.company.present?
add_tag("network:#{range.company&.downcase&.gsub(/\s+/, "_")}") if range.company.present?
return true
end
# Check if the company is a known bot provider (from bot imports)
# Common bot companies: Google, Amazon, OpenAI, Cloudflare, Microsoft, etc.
known_bot_companies = ['googlebot', 'google bot', 'amazon', 'aws', 'openai',
'anthropic', 'cloudflare', 'microsoft', 'facebook',
'meta', 'apple', 'duckduckgo']
known_bot_companies = ["googlebot", "google bot", "amazon", "aws", "openai",
"anthropic", "cloudflare", "microsoft", "facebook",
"meta", "apple", "duckduckgo"]
company_lower = company&.downcase
if company_lower && known_bot_companies.any? { |bot| company_lower.include?(bot) }
add_tag("bot:#{company_lower.gsub(/\s+/, '_')}")
add_tag("network:#{company_lower.gsub(/\s+/, '_')}")
add_tag("bot:#{company_lower.gsub(/\s+/, "_")}")
add_tag("network:#{company_lower.gsub(/\s+/, "_")}")
return true
end
end
@@ -784,7 +791,7 @@ class Event < ApplicationRecord
if is_datacenter && user_agent.present?
# Generic/common bot user agents in datacenter networks
ua_lower = user_agent.downcase
bot_keywords = ['bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python', 'go-http-client']
bot_keywords = ["bot", "crawler", "spider", "scraper", "curl", "wget", "python", "go-http-client"]
if bot_keywords.any? { |keyword| ua_lower.include?(keyword) }
add_tag("bot:datacenter")
add_tag("datacenter:true")