Many updates
This commit is contained in:
@@ -4,6 +4,10 @@ class Event < ApplicationRecord
|
||||
# Normalized association for hosts (most valuable compression)
|
||||
belongs_to :request_host, optional: true
|
||||
|
||||
# WAF rule associations
|
||||
belongs_to :rule, optional: true
|
||||
has_one :waf_policy, through: :rule
|
||||
|
||||
# Enums for fixed value sets
|
||||
enum :waf_action, {
|
||||
allow: 0, # allow/pass
|
||||
@@ -29,7 +33,7 @@ class Event < ApplicationRecord
|
||||
# This provides direct array access and efficient indexing
|
||||
attribute :tags, :json, default: -> { [] }
|
||||
|
||||
validates :event_id, presence: true, uniqueness: true
|
||||
validates :request_id, presence: true, uniqueness: true
|
||||
validates :timestamp, presence: true
|
||||
|
||||
scope :recent, -> { order(timestamp: :desc) }
|
||||
@@ -55,32 +59,42 @@ class Event < ApplicationRecord
|
||||
where("tags @> ARRAY[?]", tag_array)
|
||||
}
|
||||
|
||||
# Network-based filtering scopes
|
||||
# Network-based filtering scopes - now using denormalized columns
|
||||
scope :by_company, ->(company) {
|
||||
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
|
||||
.where("network_ranges.company ILIKE ?", "%#{company}%")
|
||||
where("company ILIKE ?", "%#{company}%")
|
||||
}
|
||||
|
||||
scope :by_country, ->(country) {
|
||||
where(country: country)
|
||||
}
|
||||
|
||||
scope :by_network_type, ->(type) {
|
||||
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
|
||||
.case(type)
|
||||
.when("datacenter") { where("network_ranges.is_datacenter = ?", true) }
|
||||
.when("vpn") { where("network_ranges.is_vpn = ?", true) }
|
||||
.when("proxy") { where("network_ranges.is_proxy = ?", true) }
|
||||
.when("standard") { where("network_ranges.is_datacenter = ? AND network_ranges.is_vpn = ? AND network_ranges.is_proxy = ?", false, false, false) }
|
||||
.else { none }
|
||||
case type.to_s.downcase
|
||||
when "datacenter"
|
||||
where(is_datacenter: true)
|
||||
when "vpn"
|
||||
where(is_vpn: true)
|
||||
when "proxy"
|
||||
where(is_proxy: true)
|
||||
when "standard"
|
||||
where(is_datacenter: false, is_vpn: false, is_proxy: false)
|
||||
else
|
||||
none
|
||||
end
|
||||
}
|
||||
|
||||
scope :by_asn, ->(asn) {
|
||||
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
|
||||
.where("network_ranges.asn = ?", asn.to_i)
|
||||
where(asn: asn.to_i)
|
||||
}
|
||||
|
||||
scope :by_network_cidr, ->(cidr) {
|
||||
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
|
||||
.where("network_ranges.network = ?", cidr)
|
||||
# This still requires a join since we need to match CIDR
|
||||
joins(:network_range).where("network_ranges.network = ?", cidr)
|
||||
}
|
||||
|
||||
# Add association for the optional network_range_id
|
||||
belongs_to :network_range, optional: true
|
||||
|
||||
# Path prefix matching using range queries (uses B-tree index efficiently)
|
||||
scope :with_path_prefix, ->(prefix_segment_ids) {
|
||||
return none if prefix_segment_ids.blank?
|
||||
@@ -130,13 +144,39 @@ class Event < ApplicationRecord
|
||||
# Normalize event fields after extraction
|
||||
after_validation :normalize_event_fields, if: :should_normalize?
|
||||
|
||||
def self.create_from_waf_payload!(event_id, payload)
|
||||
# Populate network intelligence from IP address
|
||||
before_save :populate_network_intelligence, if: :should_populate_network_intelligence?
|
||||
|
||||
# Backfill network intelligence for all events
|
||||
def self.backfill_network_intelligence!(batch_size: 10_000)
|
||||
total = where(country: nil).count
|
||||
return 0 if total.zero?
|
||||
|
||||
puts "Backfilling network intelligence for #{total} events..."
|
||||
processed = 0
|
||||
|
||||
where(country: nil).find_in_batches(batch_size: batch_size) do |batch|
|
||||
batch.each(&:save) # Triggers before_save callback
|
||||
processed += batch.size
|
||||
puts " Processed #{processed}/#{total} (#{(processed.to_f / total * 100).round(1)}%)"
|
||||
end
|
||||
|
||||
processed
|
||||
end
|
||||
|
||||
# Backfill network intelligence for a specific event
|
||||
def backfill_network_intelligence!
|
||||
populate_network_intelligence
|
||||
save!
|
||||
end
|
||||
|
||||
def self.create_from_waf_payload!(request_id, payload)
|
||||
# Normalize headers in payload during import phase
|
||||
normalized_payload = normalize_payload_headers(payload)
|
||||
|
||||
# Create the WAF request event
|
||||
create!(
|
||||
event_id: event_id,
|
||||
request_id: request_id,
|
||||
timestamp: parse_timestamp(normalized_payload["timestamp"]),
|
||||
payload: normalized_payload,
|
||||
|
||||
@@ -150,7 +190,8 @@ class Event < ApplicationRecord
|
||||
response_status: normalized_payload.dig("response", "status_code"),
|
||||
response_time_ms: normalized_payload.dig("response", "duration_ms"),
|
||||
waf_action: normalize_action(normalized_payload["waf_action"]), # Normalize incoming action values
|
||||
rule_matched: normalized_payload["rule_matched"],
|
||||
# Support both new (rule_id) and old (rule_matched) field names during cutover
|
||||
rule_id: normalized_payload["rule_id"] || normalized_payload["rule_matched"],
|
||||
blocked_reason: normalized_payload["blocked_reason"],
|
||||
|
||||
# Server/Environment info
|
||||
@@ -283,7 +324,7 @@ class Event < ApplicationRecord
|
||||
end
|
||||
|
||||
def rule_matched?
|
||||
rule_matched.present?
|
||||
rule_id.present?
|
||||
end
|
||||
|
||||
# New path methods for normalization
|
||||
@@ -343,40 +384,39 @@ class Event < ApplicationRecord
|
||||
end
|
||||
|
||||
def most_specific_range
|
||||
matching_network_ranges.first
|
||||
# Use the cached network_range_id if available (much faster)
|
||||
return NetworkRange.find_by(id: network_range_id) if network_range_id.present?
|
||||
|
||||
# Fallback to expensive lookup
|
||||
matching_network_ranges.first&.dig(:range)
|
||||
end
|
||||
|
||||
def broadest_range
|
||||
matching_network_ranges.last
|
||||
matching_network_ranges.last&.dig(:range)
|
||||
end
|
||||
|
||||
def network_intelligence
|
||||
most_specific_range&.dig(:intelligence) || {}
|
||||
# Use denormalized fields instead of expensive lookup
|
||||
{
|
||||
country: country,
|
||||
company: company,
|
||||
asn: asn,
|
||||
asn_org: asn_org,
|
||||
is_datacenter: is_datacenter,
|
||||
is_vpn: is_vpn,
|
||||
is_proxy: is_proxy
|
||||
}
|
||||
end
|
||||
|
||||
def company
|
||||
network_intelligence[:company]
|
||||
end
|
||||
|
||||
def asn
|
||||
network_intelligence[:asn]
|
||||
end
|
||||
|
||||
def asn_org
|
||||
network_intelligence[:asn_org]
|
||||
end
|
||||
|
||||
def is_datacenter?
|
||||
network_intelligence[:is_datacenter] || false
|
||||
end
|
||||
|
||||
def is_proxy?
|
||||
network_intelligence[:is_proxy] || false
|
||||
end
|
||||
|
||||
def is_vpn?
|
||||
network_intelligence[:is_vpn] || false
|
||||
end
|
||||
# Denormalized attribute accessors - these now use the columns directly
|
||||
# No need to override - Rails provides these automatically:
|
||||
# - country (column)
|
||||
# - company (column)
|
||||
# - asn (column)
|
||||
# - asn_org (column)
|
||||
# - is_datacenter (column)
|
||||
# - is_vpn (column)
|
||||
# - is_proxy (column)
|
||||
|
||||
# IP validation
|
||||
def valid_ipv4?
|
||||
@@ -480,7 +520,8 @@ class Event < ApplicationRecord
|
||||
self.request_url = request_data["url"]
|
||||
self.response_status = response_data["status_code"]
|
||||
self.response_time_ms = response_data["duration_ms"]
|
||||
self.rule_matched = payload["rule_matched"]
|
||||
# Support both new (rule_id) and old (rule_matched) field names during cutover
|
||||
self.rule_id = payload["rule_id"] || payload["rule_matched"]
|
||||
self.blocked_reason = payload["blocked_reason"]
|
||||
|
||||
# Store original values for normalization only if they don't exist yet
|
||||
|
||||
Reference in New Issue
Block a user