First commit!

This commit is contained in:
Dan Milne
2025-11-03 17:37:28 +11:00
commit 429d41eead
141 changed files with 5890 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
class ApplicationRecord < ActiveRecord::Base
primary_abstract_class
end

View File

16
app/models/current.rb Normal file
View File

@@ -0,0 +1,16 @@
# frozen_string_literal: true
class Current < ActiveSupport::CurrentAttributes
attribute :baffle_host
attribute :baffle_internal_host
attribute :project
attribute :ip
def self.baffle_host
@baffle_host || ENV.fetch("BAFFLE_HOST", "localhost:3000")
end
def self.baffle_internal_host
@baffle_internal_host || ENV.fetch("BAFFLE_INTERNAL_HOST", nil)
end
end

287
app/models/event.rb Normal file
View File

@@ -0,0 +1,287 @@
# frozen_string_literal: true
class Event < ApplicationRecord
belongs_to :project
# Normalized association for hosts (most valuable compression)
belongs_to :request_host, optional: true
# Enums for fixed value sets
enum :waf_action, {
allow: 0, # allow/pass
deny: 1, # deny/block
redirect: 2, # redirect
challenge: 3 # challenge (future implementation)
}, default: :allow, scopes: false
enum :request_method, {
get: 0, # GET
post: 1, # POST
put: 2, # PUT
patch: 3, # PATCH
delete: 4, # DELETE
head: 5, # HEAD
options: 6 # OPTIONS
}, default: :get, scopes: false
# Serialize segment IDs as array for easy manipulation in Railssqit
serialize :request_segment_ids, type: Array, coder: JSON
validates :event_id, presence: true, uniqueness: true
validates :timestamp, presence: true
scope :recent, -> { order(timestamp: :desc) }
scope :by_ip, ->(ip) { where(ip_address: ip) }
scope :by_user_agent, ->(user_agent) { where(user_agent: user_agent) }
scope :by_waf_action, ->(waf_action) { where(waf_action: waf_action) }
scope :blocked, -> { where(waf_action: ['block', 'deny']) }
scope :allowed, -> { where(waf_action: ['allow', 'pass']) }
scope :rate_limited, -> { where(waf_action: 'rate_limit') }
# Path prefix matching using range queries (uses B-tree index efficiently)
scope :with_path_prefix, ->(prefix_segment_ids) {
return none if prefix_segment_ids.blank?
# Use range queries instead of LIKE for index usage
# Example: [1,2] prefix matches [1,2], [1,2,3], [1,2,3,4], etc.
prefix_str = prefix_segment_ids.to_json # "[1,2]"
# For exact match: request_segment_ids = "[1,2]"
# For prefix match: "[1,2," <= request_segment_ids < "[1,3,"
# This works because JSON arrays sort lexicographically
# Build the range upper bound by incrementing last segment ID
upper_prefix = prefix_segment_ids[0..-2] + [prefix_segment_ids.last + 1]
upper_str = upper_prefix.to_json
lower_prefix_str = "#{prefix_str[0..-2]}," # "[1,2," - matches longer paths
# Use raw SQL to bypass serializer (it expects Array but we're comparing strings)
where("request_segment_ids = ? OR (request_segment_ids >= ? AND request_segment_ids < ?)",
prefix_str, lower_prefix_str, upper_str)
}
# Path depth queries
scope :path_depth, ->(depth) {
where("json_array_length(request_segment_ids) = ?", depth)
}
scope :path_depth_greater_than, ->(depth) {
where("json_array_length(request_segment_ids) > ?", depth)
}
# Helper methods
def path_depth
request_segment_ids&.length || 0
end
def reconstructed_path
return request_path if request_segment_ids.blank?
segments = PathSegment.where(id: request_segment_ids).index_by(&:id)
'/' + request_segment_ids.map { |id| segments[id]&.segment }.compact.join('/')
end
# Extract key fields from payload before saving
before_validation :extract_fields_from_payload
# Normalize event fields after extraction
after_validation :normalize_event_fields, if: :should_normalize?
def self.create_from_waf_payload!(event_id, payload, project)
# Create the WAF request event
create!(
project: project,
event_id: event_id,
timestamp: parse_timestamp(payload["timestamp"]),
payload: payload,
# WAF-specific fields
ip_address: payload.dig("request", "ip"),
user_agent: payload.dig("request", "headers", "User-Agent"),
request_method: payload.dig("request", "method")&.downcase,
request_path: payload.dig("request", "path"),
request_url: payload.dig("request", "url"),
request_protocol: payload.dig("request", "protocol"),
response_status: payload.dig("response", "status_code"),
response_time_ms: payload.dig("response", "duration_ms"),
waf_action: normalize_action(payload["waf_action"]), # Normalize incoming action values
rule_matched: payload["rule_matched"],
blocked_reason: payload["blocked_reason"],
# Server/Environment info
server_name: payload["server_name"],
environment: payload["environment"],
# Geographic data
country_code: payload.dig("geo", "country_code"),
city: payload.dig("geo", "city"),
# WAF agent info
agent_version: payload.dig("agent", "version"),
agent_name: payload.dig("agent", "name")
)
end
def self.normalize_action(action)
return "allow" if action.nil? || action.blank?
case action.to_s.downcase
when "allow", "pass", "allowed"
"allow"
when "deny", "block", "blocked", "deny_access"
"deny"
when "challenge"
"challenge"
when "redirect"
"redirect"
else
Rails.logger.warn "Unknown action '#{action}', defaulting to 'allow'"
"allow"
end
end
def self.parse_timestamp(timestamp)
case timestamp
when String
Time.parse(timestamp)
when Numeric
# Sentry timestamps can be in seconds with decimals
Time.at(timestamp)
when Time
timestamp
else
Time.current
end
rescue => e
Rails.logger.error "Failed to parse timestamp #{timestamp}: #{e.message}"
Time.current
end
def request_details
return {} unless payload.present?
request_data = payload.dig("request") || {}
{
ip: request_data["ip"],
method: request_data["method"],
path: request_data["path"],
url: request_data["url"],
protocol: request_data["protocol"],
headers: request_data["headers"] || {},
query: request_data["query"] || {},
body_size: request_data["body_size"]
}
end
def response_details
return {} unless payload.present?
response_data = payload.dig("response") || {}
{
status_code: response_data["status_code"],
duration_ms: response_data["duration_ms"],
size: response_data["size"]
}
end
def geo_details
return {} unless payload.present?
payload.dig("geo") || {}
end
def tags
payload&.dig("tags") || {}
end
def headers
payload&.dig("request", "headers") || {}
end
def query_params
payload&.dig("request", "query") || {}
end
def blocked?
waf_action.in?(['block', 'deny'])
end
def allowed?
waf_action.in?(['allow', 'pass'])
end
def rate_limited?
waf_action == 'rate_limit'
end
def challenged?
waf_action == 'challenge'
end
def rule_matched?
rule_matched.present?
end
# New path methods for normalization
def path_segments
return [] unless request_path.present?
request_path.split('/').reject(&:blank?)
end
def path_segments_array
@path_segments_array ||= path_segments
end
def request_hostname
return nil unless request_url.present?
URI.parse(request_url).hostname rescue nil
end
private
def should_normalize?
request_host_id.nil? || request_segment_ids.blank?
end
def normalize_event_fields
EventNormalizer.normalize_event!(self)
rescue => e
Rails.logger.error "Failed to normalize event #{id}: #{e.message}"
end
def extract_fields_from_payload
return unless payload.present?
# Extract WAF-specific fields for direct querying
request_data = payload.dig("request") || {}
response_data = payload.dig("response") || {}
self.ip_address = request_data["ip"]
self.user_agent = request_data.dig("headers", "User-Agent")
self.request_path = request_data["path"]
self.request_url = request_data["url"]
self.response_status = response_data["status_code"]
self.response_time_ms = response_data["duration_ms"]
self.rule_matched = payload["rule_matched"]
self.blocked_reason = payload["blocked_reason"]
# Store original values for normalization (these will be normalized to IDs)
@raw_request_method = request_data["method"]
@raw_request_protocol = request_data["protocol"]
@raw_action = payload["waf_action"]
# Extract server/environment info
self.server_name = payload["server_name"]
self.environment = payload["environment"]
# Extract geographic data
geo_data = payload.dig("geo") || {}
self.country_code = geo_data["country_code"]
self.city = geo_data["city"]
# Extract agent info
agent_data = payload.dig("agent") || {}
self.agent_version = agent_data["version"]
self.agent_name = agent_data["name"]
end
end

97
app/models/issue.rb Normal file
View File

@@ -0,0 +1,97 @@
# frozen_string_literal: true
class Issue < ApplicationRecord
belongs_to :project
has_many :events, dependent: :nullify
enum :status, { open: 0, resolved: 1, ignored: 2 }
validates :title, presence: true
scope :recent, -> { order(last_seen: :desc) }
scope :by_frequency, -> { order(count: :desc) }
# Callbacks for email notifications
after_create :notify_new_issue, if: :should_notify_new_issue?
after_update :notify_issue_reopened, if: :was_reopened?
# Real-time updates
after_create_commit do
broadcast_refresh_to(project)
end
after_update_commit do
broadcast_refresh # Refreshes the issue show page
broadcast_refresh_to(project, "issues") # Refreshes the project's issues index
end
def self.group_event(event_payload, project)
fingerprint = generate_fingerprint(event_payload)
find_or_create_by(project: project, fingerprint: fingerprint) do |issue|
issue.title = extract_title(event_payload)
issue.exception_type = extract_exception_type(event_payload)
issue.first_seen = Time.current
issue.last_seen = Time.current
issue.status = :open
end
end
def self.generate_fingerprint(payload)
# Use Sentry's fingerprint if provided
if payload["fingerprint"].present?
payload["fingerprint"].join("::")
else
# Generate from exception type + location
type = payload.dig("exception", "values", 0, "type")
file = payload.dig("exception", "values", 0, "stacktrace", "frames", -1, "filename")
line = payload.dig("exception", "values", 0, "stacktrace", "frames", -1, "lineno")
# Fallback to message if no exception
if type.blank?
message = payload["message"] || "Unknown Error"
Digest::MD5.hexdigest(message)
else
"#{type}::#{file}::#{line}"
end
end
end
def self.extract_title(payload)
payload.dig("exception", "values", 0, "value") ||
payload["message"] ||
payload.dig("exception", "values", 0, "type") ||
"Unknown Error"
end
def self.extract_exception_type(payload)
payload.dig("exception", "values", 0, "type")
end
def record_event!(timestamp: Time.current)
update!(
count: count + 1,
last_seen: timestamp
)
end
private
def notify_new_issue
IssueMailer.new_issue(self).deliver_later
end
def notify_issue_reopened
IssueMailer.issue_reopened(self).deliver_later
end
def should_notify_new_issue?
# Only notify for new issues in production environment or if explicitly enabled
Rails.env.production? || ENV['SPLAT_EMAIL_NOTIFICATIONS'] == 'true'
end
def was_reopened?
# Check if status changed from resolved to open
saved_change_to_status?(from: 1, to: 0) # resolved=1, open=0
end
end

View File

@@ -0,0 +1,63 @@
class NetworkRange < ApplicationRecord
validates :ip_address, presence: true
validates :network_prefix, presence: true, numericality: {greater_than_or_equal_to: 0, less_than_or_equal_to: 128}
validates :ip_version, presence: true, inclusion: {in: [4, 6]}
# Convenience methods for JSON fields
def abuser_scores_hash
abuser_scores ? JSON.parse(abuser_scores) : {}
end
def abuser_scores_hash=(hash)
self.abuser_scores = hash.to_json
end
def additional_data_hash
additional_data ? JSON.parse(additional_data) : {}
end
def additional_data_hash=(hash)
self.additional_data = hash.to_json
end
# Scope methods for common queries
scope :ipv4, -> { where(ip_version: 4) }
scope :ipv6, -> { where(ip_version: 6) }
scope :datacenter, -> { where(is_datacenter: true) }
scope :proxy, -> { where(is_proxy: true) }
scope :vpn, -> { where(is_vpn: true) }
scope :by_country, ->(country) { where(ip_api_country: country) }
scope :by_company, ->(company) { where(company: company) }
scope :by_asn, ->(asn) { where(asn: asn) }
# Find network ranges that contain a specific IP address
def self.contains_ip(ip_string)
ip_bytes = IPAddr.new(ip_string).hton
version = ip_string.include?(":") ? 6 : 4
where(ip_version: version).select do |range|
range.contains_ip_bytes?(ip_bytes)
end
end
def contains_ip?(ip_string)
contains_ip_bytes?(IPAddr.new(ip_string).hton)
end
def to_s
"#{ip_address_to_s}/#{network_prefix}"
end
private
def contains_ip_bytes?(ip_bytes)
# This is a simplified version - you'll need proper network math here
# For now, just check if the IP matches exactly
ip_address == ip_bytes
end
def ip_address_to_s
# Convert binary IP back to string representation
IPAddr.ntop(ip_address)
end
end

View File

@@ -0,0 +1,17 @@
class PathSegment < ApplicationRecord
validates :segment, presence: true, uniqueness: true
validates :usage_count, presence: true, numericality: { greater_than: 0 }
# Class method to find or create a segment
def self.find_or_create_segment(segment)
find_or_create_by(segment: segment) do |path_segment|
path_segment.usage_count = 1
path_segment.first_seen_at = Time.current
end
end
# Increment usage count
def increment_usage!
increment!(:usage_count)
end
end

211
app/models/project.rb Normal file
View File

@@ -0,0 +1,211 @@
# frozen_string_literal: true
class Project < ApplicationRecord
has_many :events, dependent: :destroy
validates :name, presence: true
validates :slug, presence: true, uniqueness: true
validates :public_key, presence: true, uniqueness: true
scope :by_slug, ->(slug) { where(slug: slug) }
scope :by_public_key, ->(key) { where(public_key: key) }
scope :enabled, -> { where(enabled: true) }
before_validation :generate_slug, if: :name?
before_validation :generate_public_key, if: -> { public_key.blank? }
before_validation :set_default_settings, if: -> { settings.blank? }
def broadcast_events_refresh
# Broadcast to the events stream for this project
broadcast_refresh_to(self, "events")
end
def broadcast_rules_refresh
# Broadcast to the rules stream for this project (for future rule management UI)
broadcast_refresh_to(self, "rules")
end
def self.find_by_dsn(dsn)
# Parse DSN: https://public_key@host/project_id
return nil unless dsn.present?
# Extract public_key from DSN
match = dsn.match(/https?:\/\/([^@]+)@/)
return nil unless match
public_key = match[1]
find_by(public_key: public_key)
end
def self.find_by_project_id(project_id)
# Try slug first (nicer URLs), then fall back to ID
find_by(slug: project_id.to_s) || find_by(id: project_id.to_i)
end
def dsn
host = Current.baffle_host || "localhost:3000"
protocol = host.include?("localhost") ? "http" : "https"
"#{protocol}://#{public_key}@#{host}/#{slug}"
end
def internal_dsn
return nil unless Current.baffle_internal_host.present?
host = Current.baffle_internal_host
protocol = "http" # Internal connections use HTTP
"#{protocol}://#{public_key}@#{host}/#{slug}"
end
# WAF Analytics Methods
def recent_events(limit: 100)
events.recent.limit(limit)
end
def recent_blocked_events(limit: 100)
events.blocked.recent.limit(limit)
end
def recent_rate_limited_events(limit: 100)
events.rate_limited.recent.limit(limit)
end
def top_blocked_ips(limit: 10, time_range: 1.hour.ago)
events.blocked
.where(timestamp: time_range)
.group(:ip_address)
.select('ip_address, COUNT(*) as count')
.order('count DESC')
.limit(limit)
end
def event_count(time_range = nil)
if time_range
events.where(timestamp: time_range).count
else
events.count
end
end
def blocked_count(time_range = nil)
if time_range
events.blocked.where(timestamp: time_range).count
else
events.blocked.count
end
end
def allowed_count(time_range = nil)
if time_range
events.allowed.where(timestamp: time_range).count
else
events.allowed.count
end
end
# Helper method to parse settings safely
def parsed_settings
if settings.is_a?(String)
JSON.parse(settings || '{}')
else
settings || {}
end
rescue JSON::ParserError
{}
end
# WAF Configuration Methods
def rate_limit_enabled?
parsed_settings.dig('rate_limiting', 'enabled') != false
end
def rate_limit_threshold
parsed_settings.dig('rate_limiting', 'threshold') || 100
end
def custom_rules_enabled?
parsed_settings.dig('custom_rules', 'enabled') == true
end
def block_by_country_enabled?
parsed_settings.dig('geo_blocking', 'enabled') == true
end
def blocked_countries
parsed_settings.dig('geo_blocking', 'blocked_countries') || []
end
def block_datacenters_enabled?
parsed_settings.dig('datacenter_blocking', 'enabled') == true
end
# WAF Rule Management
def add_ip_rule(ip_address, action, expires_at: nil, reason: nil)
# This will integrate with the IP rules storage system
# For now, store in settings as a temporary solution
current_settings = parsed_settings
ip_rules = current_settings['ip_rules'] || {}
ip_rules[ip_address] = {
action: action,
expires_at: expires_at&.iso8601,
reason: reason,
created_at: Time.current.iso8601
}
update(settings: current_settings.merge('ip_rules' => ip_rules))
end
def remove_ip_rule(ip_address)
current_settings = parsed_settings
ip_rules = current_settings['ip_rules'] || {}
ip_rules.delete(ip_address)
update(settings: current_settings.merge('ip_rules' => ip_rules))
end
def blocked_ips
ip_rules = parsed_settings['ip_rules'] || {}
ip_rules.select { |_ip, rule| rule['action'] == 'block' }.keys
end
def waf_status
return 'disabled' unless enabled?
return 'active' if events.where(timestamp: 1.hour.ago..).exists?
'idle'
end
private
def generate_slug
self.slug = name&.parameterize&.downcase
end
def generate_public_key
# Generate a random 32-character hex string for WAF authentication
self.public_key = SecureRandom.hex(16)
end
def set_default_settings
self.settings = {
'rate_limiting' => {
'enabled' => true,
'threshold' => 100, # requests per minute
'window' => 60 # seconds
},
'geo_blocking' => {
'enabled' => false,
'blocked_countries' => []
},
'datacenter_blocking' => {
'enabled' => false,
'allow_known_datacenters' => true
},
'custom_rules' => {
'enabled' => false,
'rules' => []
},
'ip_rules' => {},
'challenge' => {
'enabled' => true,
'provider' => 'recaptcha'
}
}
end
end

View File

@@ -0,0 +1,19 @@
class RequestHost < ApplicationRecord
validates :hostname, presence: true, uniqueness: true
validates :usage_count, presence: true, numericality: { greater_than: 0 }
has_many :events, dependent: :nullify
# Class method to find or create a host
def self.find_or_create_host(hostname)
find_or_create_by(hostname: hostname) do |host|
host.usage_count = 1
host.first_seen_at = Time.current
end
end
# Increment usage count
def increment_usage!
increment!(:usage_count)
end
end

126
app/models/rule.rb Normal file
View File

@@ -0,0 +1,126 @@
# frozen_string_literal: true
class Rule < ApplicationRecord
belongs_to :rule_set
validates :rule_type, presence: true, inclusion: { in: RuleSet::RULE_TYPES }
validates :target, presence: true
validates :action, presence: true, inclusion: { in: RuleSet::ACTIONS }
validates :priority, presence: true, numericality: { greater_than: 0 }
scope :enabled, -> { where(enabled: true) }
scope :by_priority, -> { order(priority: :desc, created_at: :desc) }
scope :expired, -> { where("expires_at < ?", Time.current) }
scope :not_expired, -> { where("expires_at IS NULL OR expires_at > ?", Time.current) }
# Check if rule is currently active
def active?
enabled? && (expires_at.nil? || expires_at > Time.current)
end
# Check if rule matches given request context
def matches?(context)
return false unless active?
case rule_type
when 'ip'
match_ip_rule?(context)
when 'cidr'
match_cidr_rule?(context)
when 'path'
match_path_rule?(context)
when 'user_agent'
match_user_agent_rule?(context)
when 'parameter'
match_parameter_rule?(context)
when 'method'
match_method_rule?(context)
when 'country'
match_country_rule?(context)
else
false
end
end
def to_waf_format
{
id: id,
type: rule_type,
target: target,
action: action,
conditions: conditions || {},
priority: priority,
expires_at: expires_at,
active: active?
}
end
private
def match_ip_rule?(context)
return false unless context[:ip_address]
target == context[:ip_address]
end
def match_cidr_rule?(context)
return false unless context[:ip_address]
begin
range = IPAddr.new(target)
range.include?(context[:ip_address])
rescue IPAddr::InvalidAddressError
false
end
end
def match_path_rule?(context)
return false unless context[:request_path]
# Support exact match and regex patterns
if conditions&.dig('regex') == true
Regexp.new(target).match?(context[:request_path])
else
context[:request_path].start_with?(target)
end
end
def match_user_agent_rule?(context)
return false unless context[:user_agent]
# Support exact match and regex patterns
if conditions&.dig('regex') == true
Regexp.new(target, Regexp::IGNORECASE).match?(context[:user_agent])
else
context[:user_agent].downcase.include?(target.downcase)
end
end
def match_parameter_rule?(context)
return false unless context[:query_params]
param_name = conditions&.dig('parameter_name') || target
param_value = context[:query_params][param_name]
return false unless param_value
# Check if parameter value matches pattern
if conditions&.dig('regex') == true
Regexp.new(target, Regexp::IGNORECASE).match?(param_value.to_s)
else
param_value.to_s.downcase.include?(target.downcase)
end
end
def match_method_rule?(context)
return false unless context[:request_method]
target.upcase == context[:request_method].upcase
end
def match_country_rule?(context)
return false unless context[:country_code]
target.upcase == context[:country_code].upcase
end
end

108
app/models/rule_set.rb Normal file
View File

@@ -0,0 +1,108 @@
# frozen_string_literal: true
class RuleSet < ApplicationRecord
has_many :rules, dependent: :destroy
validates :name, presence: true, uniqueness: true
validates :slug, presence: true, uniqueness: true
scope :enabled, -> { where(enabled: true) }
scope :by_priority, -> { order(priority: :desc, created_at: :desc) }
before_validation :generate_slug, if: :name?
before_validation :set_default_values
# Rule Types
RULE_TYPES = %w[ip cidr path user_agent parameter method rate_limit country].freeze
ACTIONS = %w[allow deny challenge rate_limit].freeze
def to_waf_rules
return [] unless enabled?
rules.enabled.by_priority.map do |rule|
{
id: rule.id,
type: rule.rule_type,
target: rule.target,
action: rule.action,
conditions: rule.conditions,
priority: rule.priority,
expires_at: rule.expires_at
}
end
end
def add_rule(rule_type, target, action, conditions: {}, expires_at: nil, priority: 100)
rules.create!(
rule_type: rule_type,
target: target,
action: action,
conditions: conditions,
expires_at: expires_at,
priority: priority
)
end
def remove_rule(rule_id)
rules.find(rule_id).destroy
end
def block_ip(ip_address, expires_at: nil, reason: nil)
add_rule('ip', ip_address, 'deny', expires_at: expires_at, priority: 1000)
end
def allow_ip(ip_address, expires_at: nil)
add_rule('ip', ip_address, 'allow', expires_at: expires_at, priority: 1000)
end
def block_cidr(cidr, expires_at: nil, reason: nil)
add_rule('cidr', cidr, 'deny', expires_at: expires_at, priority: 900)
end
def block_path(path, conditions: {}, expires_at: nil)
add_rule('path', path, 'deny', conditions: conditions, expires_at: expires_at, priority: 500)
end
def block_user_agent(user_agent_pattern, expires_at: nil)
add_rule('user_agent', user_agent_pattern, 'deny', expires_at: expires_at, priority: 600)
end
def push_to_agents!
# This would integrate with the agent distribution system
Rails.logger.info "Pushing rule set '#{name}' with #{rules.count} rules to agents"
# Broadcast update to connected projects
projects = Project.where(id: projects_subscription || [])
projects.each(&:broadcast_rules_refresh)
end
def active_projects
return Project.none unless projects_subscription.present?
Project.where(id: projects_subscription).enabled
end
def subscribe_project(project)
subscriptions = projects_subscription || []
subscriptions << project.id unless subscriptions.include?(project.id)
update(projects_subscription: subscriptions.uniq)
end
def unsubscribe_project(project)
subscriptions = projects_subscription || []
subscriptions.delete(project.id)
update(projects_subscription: subscriptions)
end
private
def generate_slug
self.slug = name&.parameterize&.downcase
end
def set_default_values
self.enabled = true if enabled.nil?
self.priority = 100 if priority.nil?
self.projects_subscription = [] if projects_subscription.nil?
end
end