599 lines
19 KiB
Ruby
599 lines
19 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# NetworkRange - Unified IPv4/IPv6 network range management
|
|
#
|
|
# Uses PostgreSQL's inet type to handle both IPv4 and IPv4 networks seamlessly.
|
|
# Provides network intelligence data including ASN, company, geographic info,
|
|
# and classification flags (datacenter, proxy, VPN).
|
|
class NetworkRange < ApplicationRecord
|
|
# Sources for network range creation
|
|
SOURCES = %w[api_imported user_created manual auto_generated inherited geolite_asn geolite_country
|
|
bot_import_amazon_aws bot_import_google bot_import_microsoft_bing bot_import_anthropic
|
|
bot_import_openai_searchbot bot_import_openai_chatgpt_user bot_import_openai_gptbot
|
|
bot_import_cloudflare bot_import_facebook bot_import_applebot bot_import_duckduckgo
|
|
production_import].freeze
|
|
|
|
# Associations
|
|
has_many :rules, dependent: :destroy
|
|
has_many :events, foreign_key: :network_range_id, dependent: :nullify
|
|
belongs_to :user, optional: true
|
|
|
|
# Validations
|
|
validates :network, presence: true, uniqueness: true
|
|
validates :source, inclusion: { in: SOURCES }
|
|
validates :asn, numericality: { greater_than: 0 }, allow_blank: true
|
|
|
|
# Scopes
|
|
scope :ipv4, -> { where("family(network) = 4") }
|
|
scope :ipv6, -> { where("family(network) = 6") }
|
|
scope :by_country, ->(country) { where(country: country) }
|
|
scope :by_company, ->(company) { where(company: company) }
|
|
scope :by_asn, ->(asn) { where(asn: asn) }
|
|
scope :datacenter, -> { where(is_datacenter: true) }
|
|
scope :proxy, -> { where(is_proxy: true) }
|
|
scope :vpn, -> { where(is_vpn: true) }
|
|
scope :user_created, -> { where(source: 'user_created') }
|
|
scope :api_imported, -> { where(source: 'api_imported') }
|
|
scope :geolite_imported, -> { where(source: ['geolite_asn', 'geolite_country']) }
|
|
scope :geolite_asn, -> { where(source: 'geolite_asn') }
|
|
scope :geolite_country, -> { where(source: 'geolite_country') }
|
|
scope :with_events, -> { joins(:events).distinct }
|
|
scope :most_active, -> { joins(:events).group('network_ranges.id').order('COUNT(events.id) DESC') }
|
|
|
|
# Callbacks
|
|
before_validation :set_default_source
|
|
# after_save :update_children_inheritance!, if: :should_update_children_inheritance? # Disabled for now
|
|
|
|
# Virtual attribute for CIDR notation
|
|
def cidr
|
|
network.to_s
|
|
end
|
|
|
|
def cidr=(new_cidr)
|
|
self.network = new_cidr
|
|
end
|
|
|
|
# Network properties
|
|
def prefix_length
|
|
# Get prefix length from IPAddr object
|
|
network.prefix
|
|
end
|
|
|
|
def network_address
|
|
# Use PostgreSQL's host function or get from IPAddr object
|
|
network.to_s
|
|
end
|
|
|
|
def cidr
|
|
# Return full CIDR notation
|
|
"#{network_address}/#{prefix_length}"
|
|
end
|
|
|
|
def broadcast_address
|
|
# Use PostgreSQL's broadcast function
|
|
result = self.class.connection.execute("SELECT broadcast('#{network.to_s}')").first
|
|
result&.values&.first
|
|
end
|
|
|
|
def family
|
|
# Check if it's IPv4 or IPv6 by looking at the address
|
|
addr = network.to_s.split('/').first
|
|
addr.include?(':') ? 6 : 4
|
|
end
|
|
|
|
def virtual?
|
|
# Virtual networks are unsaved instances (not persisted to database)
|
|
!persisted?
|
|
end
|
|
|
|
def ipv4?
|
|
family == 4
|
|
end
|
|
|
|
def ipv6?
|
|
family == 6
|
|
end
|
|
|
|
# Network containment and overlap operations
|
|
def contains_ip?(ip_string)
|
|
# Use Postgres >>= operator for containment
|
|
self.class.where("network >>= ?::inet", ip_string).exists?
|
|
rescue => e
|
|
Rails.logger.error "Error checking IP containment: #{e.message}"
|
|
false
|
|
end
|
|
|
|
def contains_network?(other_cidr)
|
|
other_network = IPAddr.new(other_cidr)
|
|
network_range = IPAddr.new(network)
|
|
network_range.include?(other_network)
|
|
rescue IPAddr::InvalidAddressError
|
|
false
|
|
end
|
|
|
|
def overlaps?(other_cidr)
|
|
network_range = IPAddr.new(network)
|
|
other_network = IPAddr.new(other_cidr)
|
|
network_range.include?(other_network) || other_network.include?(network_range)
|
|
rescue IPAddr::InvalidAddressError
|
|
false
|
|
end
|
|
|
|
# Parent/child relationships
|
|
def parent_ranges
|
|
# Find networks that contain this network (less specific / shorter prefix)
|
|
# The << operator implicitly means the containing network has a shorter prefix
|
|
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
|
|
NetworkRange.where("?::inet << network", cidr)
|
|
.order("masklen(network) DESC") # Most specific parent first
|
|
end
|
|
|
|
def child_ranges
|
|
# Find networks that are contained by this network (more specific / longer prefix)
|
|
# The >> operator implicitly means the contained network has a longer prefix
|
|
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
|
|
NetworkRange.where("?::inet >> network", cidr)
|
|
.order("masklen(network) ASC") # Least specific child first
|
|
end
|
|
|
|
# Find or create an ancestor network at a specific prefix length
|
|
# For example, given 192.168.1.0/24 and prefix 16, returns 192.168.0.0/16
|
|
def find_or_create_ancestor_at_prefix(target_prefix)
|
|
return self if prefix_length <= target_prefix
|
|
|
|
# Use PostgreSQL's set_masklen to create the ancestor CIDR
|
|
result = self.class.connection.execute(
|
|
"SELECT set_masklen('#{network}'::inet, #{target_prefix})::text as ancestor_cidr"
|
|
).first
|
|
|
|
return self unless result
|
|
|
|
ancestor_cidr = result["ancestor_cidr"]
|
|
return self if ancestor_cidr == cidr
|
|
|
|
# Find or create the ancestor network range
|
|
ancestor = NetworkRange.find_by(network: ancestor_cidr)
|
|
|
|
if ancestor.nil?
|
|
# Create a virtual ancestor (not persisted, just for reference)
|
|
# The caller can decide whether to persist it
|
|
ancestor = NetworkRange.new(network: ancestor_cidr, source: 'inherited')
|
|
end
|
|
|
|
ancestor
|
|
end
|
|
|
|
# Find nearest parent with intelligence data
|
|
def parent_with_intelligence
|
|
# Find all parent ranges (networks that contain this network)
|
|
# and look for any with intelligence data, ordered by specificity
|
|
NetworkRange.where("?::inet <<= network", network.to_s)
|
|
.where("masklen(network) < ?", prefix_length)
|
|
.where("(asn IS NOT NULL OR company IS NOT NULL OR country IS NOT NULL OR is_datacenter = true OR is_vpn = true OR is_proxy = true)")
|
|
.order("masklen(network) DESC")
|
|
.first
|
|
end
|
|
|
|
# Check if this network or any parent has IPAPI data
|
|
def has_ipapi_data_available?
|
|
return true if has_network_data_from?(:ipapi)
|
|
|
|
parent_ranges.any? { |parent| parent.has_network_data_from?(:ipapi) }
|
|
end
|
|
|
|
# Generic API fetching status management
|
|
def is_fetching_api_data?(source)
|
|
fetching_status = network_data&.dig('fetching_status') || {}
|
|
fetching_status[source.to_s] &&
|
|
fetching_status[source.to_s]['started_at'] &&
|
|
fetching_status[source.to_s]['started_at'] > 5.minutes.ago.to_f
|
|
end
|
|
|
|
def mark_as_fetching_api_data!(source)
|
|
# Use database-level locking to prevent race conditions
|
|
transaction do
|
|
# Reload with lock to get fresh data
|
|
lock!
|
|
|
|
# Double-check that we're not already fetching
|
|
if is_fetching_api_data?(source)
|
|
Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
|
|
return false
|
|
end
|
|
|
|
self.network_data ||= {}
|
|
self.network_data['fetching_status'] ||= {}
|
|
self.network_data['fetching_status'][source.to_s] = {
|
|
'started_at' => Time.current.to_f,
|
|
'job_id' => SecureRandom.hex(8)
|
|
}
|
|
save!
|
|
true
|
|
end
|
|
end
|
|
|
|
def clear_fetching_status!(source)
|
|
if network_data&.dig('fetching_status')&.dig(source.to_s)
|
|
self.network_data['fetching_status'].delete(source.to_s)
|
|
# Clean up empty fetching_status hash
|
|
self.network_data.delete('fetching_status') if self.network_data['fetching_status'].empty?
|
|
save!
|
|
end
|
|
end
|
|
|
|
# Check if we should fetch API data (not available and not currently being fetched)
|
|
def should_fetch_api_data?(source)
|
|
return false if send("has_network_data_from?(#{source})") if respond_to?("has_network_data_from?(#{source})")
|
|
return false if is_fetching_api_data?(source)
|
|
|
|
true
|
|
end
|
|
|
|
# Check if this network or any parent has IPAPI data available and no active fetch
|
|
def should_fetch_ipapi_data?
|
|
return false if has_ipapi_data_available?
|
|
return false if is_fetching_api_data?(:ipapi)
|
|
|
|
# Also check if any parent is currently fetching IPAPI data
|
|
return false if parent_ranges.any? { |parent| parent.is_fetching_api_data?(:ipapi) }
|
|
|
|
true
|
|
end
|
|
|
|
def inherited_intelligence
|
|
return own_intelligence if has_intelligence?
|
|
|
|
parent = parent_with_intelligence
|
|
parent ? parent.own_intelligence.merge(inherited: true, parent_cidr: parent.cidr) : {}
|
|
end
|
|
|
|
def has_intelligence?
|
|
asn.present? || company.present? || country.present? ||
|
|
is_datacenter? || is_proxy? || is_vpn?
|
|
end
|
|
|
|
def own_intelligence
|
|
{
|
|
asn: asn,
|
|
asn_org: asn_org,
|
|
company: company,
|
|
country: country,
|
|
is_datacenter: is_datacenter,
|
|
is_proxy: is_proxy,
|
|
is_vpn: is_vpn,
|
|
inherited: false,
|
|
source: source
|
|
}
|
|
end
|
|
|
|
def agent_tally
|
|
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
|
|
# Use DuckDB for fast agent tally instead of loading all events into memory
|
|
if persisted? && has_events?
|
|
# Include child network ranges to capture all traffic within this network block
|
|
network_ids = [id] + child_ranges.pluck(:id)
|
|
|
|
# Try DuckDB first for much faster aggregation
|
|
duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
|
|
duckdb_tally || {}
|
|
else
|
|
# Virtual network - fallback to PostgreSQL CIDR query
|
|
events.map(&:user_agent).tally
|
|
end
|
|
end
|
|
end
|
|
|
|
# Helper method to try DuckDB first, fall back to PostgreSQL
|
|
def with_duckdb_fallback(&block)
|
|
result = yield
|
|
result.nil? ? nil : result # Return result or nil to trigger fallback
|
|
rescue StandardError => e
|
|
Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
|
|
nil # Return nil to trigger fallback
|
|
end
|
|
|
|
# Geographic lookup
|
|
def geo_lookup_country!
|
|
return if country.present?
|
|
|
|
sample_ip = network_address
|
|
geo_country = GeoIpService.lookup_country(sample_ip)
|
|
update!(country: geo_country) if geo_country.present?
|
|
rescue => e
|
|
Rails.logger.error "Failed to lookup geo location for network range #{cidr}: #{e.message}"
|
|
end
|
|
|
|
# Class methods for network operations
|
|
def self.contains_ip(ip_string)
|
|
where("network >>= ?", ip_string)
|
|
.order("masklen(network) DESC") # Most specific first
|
|
end
|
|
|
|
def self.overlapping(range_cidr)
|
|
where("network && ?", range_cidr)
|
|
end
|
|
|
|
def self.findd(cidr)
|
|
cidr = cidr.gsub("_", "/")
|
|
cidr = "#{cidr}/24" unless cidr.include?("/")
|
|
find_by(network: cidr)
|
|
end
|
|
|
|
def self.find_or_create_by_cidr(cidr, user: nil, source: nil, reason: nil)
|
|
find_or_create_by(network: cidr) do |range|
|
|
range.user = user
|
|
range.source = source || 'user_created'
|
|
range.creation_reason = reason
|
|
end
|
|
end
|
|
|
|
def self.import_from_cidr(cidr, **attributes)
|
|
find_or_create_by(network: cidr) do |range|
|
|
range.assign_attributes(attributes)
|
|
end
|
|
end
|
|
|
|
# Convenience methods for JSON fields
|
|
def abuser_scores_hash
|
|
abuser_scores ? JSON.parse(abuser_scores) : {}
|
|
rescue JSON::ParserError
|
|
{}
|
|
end
|
|
|
|
def abuser_scores_hash=(hash)
|
|
self.abuser_scores = hash.to_json
|
|
end
|
|
|
|
def additional_data_hash
|
|
additional_data ? JSON.parse(additional_data) : {}
|
|
rescue JSON::ParserError
|
|
{}
|
|
end
|
|
|
|
def additional_data_hash=(hash)
|
|
self.additional_data = hash.to_json
|
|
end
|
|
|
|
# Network data accessors for different data sources
|
|
# network_data is a JSONB column with namespaced data:
|
|
# {
|
|
# geolite: {...}, # MaxMind GeoLite2 data
|
|
# ipapi: {...}, # IPAPI.is enrichment data
|
|
# abuseipdb: {...}, # Future: AbuseIPDB data
|
|
# shodan: {...} # Future: Shodan data
|
|
# }
|
|
def network_data_for(source)
|
|
network_data&.dig(source.to_s) || {}
|
|
end
|
|
|
|
def set_network_data(source, data)
|
|
self.network_data ||= {}
|
|
self.network_data[source.to_s] = data
|
|
end
|
|
|
|
# Check if we have network data from a specific source
|
|
def has_network_data_from?(source)
|
|
network_data&.key?(source.to_s) && network_data[source.to_s].present?
|
|
end
|
|
|
|
# IPAPI tracking at /24 granularity
|
|
# Find or create the /24 network for a given IP address
|
|
def self.find_or_create_tracking_network_for_ip(ip_address)
|
|
ip = IPAddr.new(ip_address.to_s)
|
|
|
|
# Create /24 network for IPv4, /64 for IPv6
|
|
tracking_cidr = if ip.ipv4?
|
|
"#{ip.mask(24)}/24"
|
|
else
|
|
"#{ip.mask(64)}/64"
|
|
end
|
|
|
|
find_or_create_by(network: tracking_cidr) do |range|
|
|
range.source = 'auto_generated'
|
|
range.creation_reason = 'IPAPI tracking network'
|
|
end
|
|
end
|
|
|
|
# Check if we should fetch IPAPI data for a given IP address
|
|
# Uses /24 networks as the tracking unit
|
|
def self.should_fetch_ipapi_for_ip?(ip_address)
|
|
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
|
|
|
|
# Check if currently being fetched (prevents duplicate jobs)
|
|
return false if tracking_network.is_fetching_api_data?(:ipapi)
|
|
|
|
# Check if /24 has been queried recently
|
|
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
|
|
return true if queried_at.nil?
|
|
|
|
# Check if IPAPI returned a CIDR that actually covers this IP
|
|
# (handles edge case where IPAPI returns /25 or more specific)
|
|
returned_cidr = tracking_network.network_data&.dig('ipapi_returned_cidr')
|
|
if returned_cidr.present?
|
|
begin
|
|
returned_range = IPAddr.new(returned_cidr)
|
|
ip = IPAddr.new(ip_address.to_s)
|
|
# If the IP is NOT covered by what IPAPI returned, fetch again
|
|
return true unless returned_range.include?(ip)
|
|
rescue IPAddr::InvalidAddressError => e
|
|
Rails.logger.warn "Invalid CIDR stored in ipapi_returned_cidr: #{returned_cidr}"
|
|
end
|
|
end
|
|
|
|
# Re-query after 1 year
|
|
Time.at(queried_at) < 1.year.ago
|
|
rescue => e
|
|
Rails.logger.error "Error checking IPAPI fetch status for #{ip_address}: #{e.message}"
|
|
true # Default to fetching on error
|
|
end
|
|
|
|
# Mark that we've queried IPAPI for this /24 network
|
|
# @param returned_cidr [String] The CIDR that IPAPI actually returned (may be more specific than /24)
|
|
def mark_ipapi_queried!(returned_cidr)
|
|
self.network_data ||= {}
|
|
self.network_data['ipapi_queried_at'] = Time.current.to_i
|
|
self.network_data['ipapi_returned_cidr'] = returned_cidr
|
|
save!
|
|
end
|
|
|
|
# String representations
|
|
def to_s
|
|
cidr
|
|
end
|
|
|
|
def to_param
|
|
cidr.to_s.gsub('/', '_')
|
|
end
|
|
|
|
# Check if network range has any events using DuckDB for performance
|
|
def has_events?
|
|
return false unless persisted?
|
|
|
|
# Include child network ranges to capture all traffic within this network block
|
|
network_ids = [id] + child_ranges.pluck(:id)
|
|
|
|
# Try DuckDB first for fast event count check
|
|
event_count = with_duckdb_fallback { EventDdb.network_event_count(network_ids) }
|
|
event_count&.positive? || events.exists?
|
|
end
|
|
|
|
def events
|
|
Event.where("ip_address <<= ?", cidr)
|
|
end
|
|
|
|
def recent_events(limit: 100)
|
|
events.recent.limit(limit)
|
|
end
|
|
|
|
def blocking_rules
|
|
rules.where(waf_action: :deny, enabled: true)
|
|
end
|
|
|
|
def active_rules
|
|
rules.enabled.where("expires_at IS NULL OR expires_at > ?", Time.current)
|
|
end
|
|
|
|
# Find all network ranges that are contained by this network and have enabled rules
|
|
# Used when creating a supernet rule to identify redundant child rules
|
|
def child_network_ranges_with_rules
|
|
NetworkRange
|
|
.where("network << ?::inet", network.to_s) # network is strictly contained by this network
|
|
.joins(:rules)
|
|
.where(rules: { enabled: true })
|
|
.distinct
|
|
end
|
|
|
|
# Find all enabled rules on child network ranges (more specific networks)
|
|
# Used after creating a rule to expire redundant child rules
|
|
def child_rules
|
|
Rule
|
|
.joins(:network_range)
|
|
.where("network_ranges.network << ?::inet", cidr)
|
|
.where(enabled: true)
|
|
end
|
|
|
|
# Find all network ranges that contain this network and have enabled rules
|
|
# Used to check if creating a rule would be redundant
|
|
def parent_network_ranges_with_rules
|
|
NetworkRange
|
|
.where("?::inet << network", cidr) # this network is strictly contained by parent
|
|
.joins(:rules)
|
|
.where(rules: { enabled: true })
|
|
.distinct
|
|
end
|
|
|
|
# Find all enabled rules on parent network ranges (less specific networks)
|
|
# Used before creating a rule to check if it would be redundant
|
|
def supernet_rules
|
|
Rule
|
|
.joins(:network_range)
|
|
.where("?::inet << network_ranges.network", cidr)
|
|
.where(enabled: true)
|
|
.order("masklen(network_ranges.network) DESC") # Most specific supernet first
|
|
end
|
|
|
|
# Check if this network range needs WAF policy evaluation
|
|
# Returns true if:
|
|
# - Never been evaluated, OR
|
|
# - Any WafPolicy has been updated since last evaluation
|
|
def needs_policy_evaluation?
|
|
return true if policies_evaluated_at.nil?
|
|
|
|
latest_policy_update = WafPolicy.maximum(:updated_at)
|
|
return false if latest_policy_update.nil? # No policies exist
|
|
|
|
policies_evaluated_at < latest_policy_update
|
|
end
|
|
|
|
private
|
|
|
|
def set_default_source
|
|
self.source ||= 'api_imported'
|
|
end
|
|
|
|
def should_update_children_inheritance?
|
|
saved_change_to_attribute?(:asn) ||
|
|
saved_change_to_attribute?(:company) ||
|
|
saved_change_to_attribute?(:country) ||
|
|
saved_change_to_attribute?(:is_datacenter) ||
|
|
saved_change_to_attribute?(:is_proxy) ||
|
|
saved_change_to_attribute?(:is_vpn)
|
|
end
|
|
|
|
def update_children_inheritance!
|
|
# Find child ranges that don't have their own intelligence
|
|
child_without_intelligence = child_ranges.where(
|
|
asn: nil,
|
|
company: nil,
|
|
country: nil,
|
|
is_datacenter: false,
|
|
is_proxy: false,
|
|
is_vpn: false
|
|
)
|
|
|
|
child_without_intelligence.find_each do |child|
|
|
Rails.logger.info "Child range #{child.cidr} can now inherit from parent #{cidr}"
|
|
# The inherited_intelligence method will pick up the new parent data
|
|
end
|
|
end
|
|
|
|
# Import-related class methods
|
|
def self.import_stats_by_source
|
|
group(:source)
|
|
.select(:source, 'COUNT(*) as count', 'MIN(created_at) as first_import', 'MAX(updated_at) as last_update')
|
|
.order(:source)
|
|
end
|
|
|
|
def self.geolite_coverage_stats
|
|
{
|
|
total_networks: geolite_imported.count,
|
|
asn_networks: geolite_asn.count,
|
|
country_networks: geolite_country.count,
|
|
with_asn_data: geolite_imported.where.not(asn: nil).count,
|
|
with_country_data: geolite_imported.where.not(country: nil).count,
|
|
with_proxy_data: geolite_imported.where(is_proxy: true).count,
|
|
unique_countries: geolite_imported.distinct.count(:country),
|
|
unique_asns: geolite_imported.distinct.count(:asn),
|
|
ipv4_networks: geolite_imported.ipv4.count,
|
|
ipv6_networks: geolite_imported.ipv6.count
|
|
}
|
|
end
|
|
|
|
def self.find_by_ip_or_network(query)
|
|
return none if query.blank?
|
|
|
|
begin
|
|
# Try to parse as IP address first
|
|
ip = IPAddr.new(query)
|
|
where("network >>= ?", ip.to_s)
|
|
rescue IPAddr::InvalidAddressError
|
|
# Try to parse as network
|
|
begin
|
|
network = IPAddr.new(query)
|
|
where(network: network.to_s)
|
|
rescue IPAddr::InvalidAddressError
|
|
none
|
|
end
|
|
end
|
|
end
|
|
end |