Compare commits
10 Commits
path-match
...
3eddfe9f7e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3eddfe9f7e | ||
|
|
179563022e | ||
|
|
de2eb43e2b | ||
|
|
3f274c842c | ||
|
|
ef56779584 | ||
|
|
de1cf0b237 | ||
|
|
4964d1a190 | ||
|
|
5d3e35a4ac | ||
|
|
830810305b | ||
| 093ee71c9f |
19
Dockerfile
19
Dockerfile
@@ -14,10 +14,25 @@ FROM docker.io/library/ruby:$RUBY_VERSION-slim AS base
|
|||||||
# Rails app lives here
|
# Rails app lives here
|
||||||
WORKDIR /rails
|
WORKDIR /rails
|
||||||
|
|
||||||
# Install base packages
|
# Install base packages and DuckDB library
|
||||||
|
ARG TARGETPLATFORM
|
||||||
RUN apt-get update -qq && \
|
RUN apt-get update -qq && \
|
||||||
apt-get install --no-install-recommends -y curl libjemalloc2 libvips sqlite3 && \
|
apt-get install --no-install-recommends -y curl libjemalloc2 libvips sqlite3 wget unzip && \
|
||||||
ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
|
ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
|
||||||
|
case "$TARGETPLATFORM" in \
|
||||||
|
"linux/amd64") \
|
||||||
|
DUCKDB_ARCH="amd64" ;; \
|
||||||
|
"linux/arm64") \
|
||||||
|
DUCKDB_ARCH="arm64" ;; \
|
||||||
|
*) \
|
||||||
|
echo "Unsupported platform: $TARGETPLATFORM" && exit 1 ;; \
|
||||||
|
esac && \
|
||||||
|
wget "https://github.com/duckdb/duckdb/releases/download/v1.4.2/libduckdb-linux-${DUCKDB_ARCH}.zip" -O /tmp/libduckdb.zip && \
|
||||||
|
unzip /tmp/libduckdb.zip -d /tmp/duckdb && \
|
||||||
|
cp /tmp/duckdb/duckdb.h /tmp/duckdb/duckdb.hpp /usr/local/include/ && \
|
||||||
|
cp /tmp/duckdb/libduckdb.so /usr/local/lib/ && \
|
||||||
|
ldconfig && \
|
||||||
|
rm -rf /tmp/libduckdb.zip /tmp/duckdb && \
|
||||||
rm -rf /var/lib/apt/lists /var/cache/apt/archives
|
rm -rf /var/lib/apt/lists /var/cache/apt/archives
|
||||||
|
|
||||||
# Set production environment variables and enable jemalloc for reduced memory usage and latency.
|
# Set production environment variables and enable jemalloc for reduced memory usage and latency.
|
||||||
|
|||||||
2
Gemfile
2
Gemfile
@@ -94,3 +94,5 @@ end
|
|||||||
gem "sentry-rails", "~> 6.1"
|
gem "sentry-rails", "~> 6.1"
|
||||||
|
|
||||||
gem "postgresql_cursor", "~> 0.6.9"
|
gem "postgresql_cursor", "~> 0.6.9"
|
||||||
|
|
||||||
|
gem "duckdb", "~> 1.4"
|
||||||
|
|||||||
@@ -116,6 +116,8 @@ GEM
|
|||||||
device_detector (1.1.3)
|
device_detector (1.1.3)
|
||||||
dotenv (3.1.8)
|
dotenv (3.1.8)
|
||||||
drb (2.2.3)
|
drb (2.2.3)
|
||||||
|
duckdb (1.4.1.1)
|
||||||
|
bigdecimal (>= 3.1.4)
|
||||||
ed25519 (1.4.0)
|
ed25519 (1.4.0)
|
||||||
email_validator (2.2.4)
|
email_validator (2.2.4)
|
||||||
activemodel
|
activemodel
|
||||||
@@ -488,6 +490,7 @@ DEPENDENCIES
|
|||||||
countries
|
countries
|
||||||
debug
|
debug
|
||||||
device_detector
|
device_detector
|
||||||
|
duckdb (~> 1.4)
|
||||||
httparty
|
httparty
|
||||||
image_processing (~> 1.2)
|
image_processing (~> 1.2)
|
||||||
importmap-rails
|
importmap-rails
|
||||||
|
|||||||
412
app/assets/stylesheets/tom-select.css
Normal file
412
app/assets/stylesheets/tom-select.css
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
/**
|
||||||
|
* tom-select.css (v2.3.1)
|
||||||
|
* Copyright (c) contributors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
|
||||||
|
* file except in compliance with the License. You may obtain a copy of the License at:
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software distributed under
|
||||||
|
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||||
|
* ANY KIND, either express or implied. See the License for the specific language
|
||||||
|
* governing permissions and limitations under the License.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
.ts-control {
|
||||||
|
border: 1px solid #d0d0d0;
|
||||||
|
padding: 8px 8px;
|
||||||
|
width: 100%;
|
||||||
|
overflow: hidden;
|
||||||
|
position: relative;
|
||||||
|
z-index: 1;
|
||||||
|
box-sizing: border-box;
|
||||||
|
box-shadow: none;
|
||||||
|
border-radius: 3px;
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
.ts-wrapper.multi.has-items .ts-control {
|
||||||
|
padding: calc(8px - 2px - 0) 8px calc(8px - 2px - 3px - 0);
|
||||||
|
}
|
||||||
|
.full .ts-control {
|
||||||
|
background-color: #fff;
|
||||||
|
}
|
||||||
|
.disabled .ts-control, .disabled .ts-control * {
|
||||||
|
cursor: default !important;
|
||||||
|
}
|
||||||
|
.focus .ts-control {
|
||||||
|
box-shadow: none;
|
||||||
|
}
|
||||||
|
.ts-control > * {
|
||||||
|
vertical-align: baseline;
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
.ts-wrapper.multi .ts-control > div {
|
||||||
|
cursor: pointer;
|
||||||
|
margin: 0 3px 3px 0;
|
||||||
|
padding: 2px 6px;
|
||||||
|
background: #f2f2f2;
|
||||||
|
color: #303030;
|
||||||
|
border: 0 solid #d0d0d0;
|
||||||
|
}
|
||||||
|
.ts-wrapper.multi .ts-control > div.active {
|
||||||
|
background: #e8e8e8;
|
||||||
|
color: #303030;
|
||||||
|
border: 0 solid #cacaca;
|
||||||
|
}
|
||||||
|
.ts-wrapper.multi.disabled .ts-control > div, .ts-wrapper.multi.disabled .ts-control > div.active {
|
||||||
|
color: #7d7d7d;
|
||||||
|
background: white;
|
||||||
|
border: 0 solid white;
|
||||||
|
}
|
||||||
|
.ts-control > input {
|
||||||
|
flex: 1 1 auto;
|
||||||
|
min-width: 7rem;
|
||||||
|
display: inline-block !important;
|
||||||
|
padding: 0 !important;
|
||||||
|
min-height: 0 !important;
|
||||||
|
max-height: none !important;
|
||||||
|
max-width: 100% !important;
|
||||||
|
margin: 0 !important;
|
||||||
|
text-indent: 0 !important;
|
||||||
|
border: 0 none !important;
|
||||||
|
background: none !important;
|
||||||
|
line-height: inherit !important;
|
||||||
|
-webkit-user-select: auto !important;
|
||||||
|
-moz-user-select: auto !important;
|
||||||
|
-ms-user-select: auto !important;
|
||||||
|
user-select: auto !important;
|
||||||
|
box-shadow: none !important;
|
||||||
|
}
|
||||||
|
.ts-control > input::-ms-clear {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.ts-control > input:focus {
|
||||||
|
outline: none !important;
|
||||||
|
}
|
||||||
|
.has-items .ts-control > input {
|
||||||
|
margin: 0 4px !important;
|
||||||
|
}
|
||||||
|
.ts-control.rtl {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
.ts-control.rtl.single .ts-control:after {
|
||||||
|
left: 15px;
|
||||||
|
right: auto;
|
||||||
|
}
|
||||||
|
.ts-control.rtl .ts-control > input {
|
||||||
|
margin: 0 4px 0 -2px !important;
|
||||||
|
}
|
||||||
|
.disabled .ts-control {
|
||||||
|
opacity: 0.5;
|
||||||
|
background-color: #fafafa;
|
||||||
|
}
|
||||||
|
.input-hidden .ts-control > input {
|
||||||
|
opacity: 0;
|
||||||
|
position: absolute;
|
||||||
|
left: -10000px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-dropdown {
|
||||||
|
position: absolute;
|
||||||
|
top: 100%;
|
||||||
|
left: 0;
|
||||||
|
width: 100%;
|
||||||
|
z-index: 10;
|
||||||
|
border: 1px solid #d0d0d0;
|
||||||
|
background: #fff;
|
||||||
|
margin: 0.25rem 0 0;
|
||||||
|
border-top: 0 none;
|
||||||
|
box-sizing: border-box;
|
||||||
|
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
||||||
|
border-radius: 0 0 3px 3px;
|
||||||
|
}
|
||||||
|
.ts-dropdown [data-selectable] {
|
||||||
|
cursor: pointer;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.ts-dropdown [data-selectable] .highlight {
|
||||||
|
background: rgba(125, 168, 208, 0.2);
|
||||||
|
border-radius: 1px;
|
||||||
|
}
|
||||||
|
.ts-dropdown .option,
|
||||||
|
.ts-dropdown .optgroup-header,
|
||||||
|
.ts-dropdown .no-results,
|
||||||
|
.ts-dropdown .create {
|
||||||
|
padding: 5px 8px;
|
||||||
|
}
|
||||||
|
.ts-dropdown .option, .ts-dropdown [data-disabled], .ts-dropdown [data-disabled] [data-selectable].option {
|
||||||
|
cursor: inherit;
|
||||||
|
opacity: 0.5;
|
||||||
|
}
|
||||||
|
.ts-dropdown [data-selectable].option {
|
||||||
|
opacity: 1;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
.ts-dropdown .optgroup:first-child .optgroup-header {
|
||||||
|
border-top: 0 none;
|
||||||
|
}
|
||||||
|
.ts-dropdown .optgroup-header {
|
||||||
|
color: #303030;
|
||||||
|
background: #fff;
|
||||||
|
cursor: default;
|
||||||
|
}
|
||||||
|
.ts-dropdown .active {
|
||||||
|
background-color: #f5fafd;
|
||||||
|
color: #495c68;
|
||||||
|
}
|
||||||
|
.ts-dropdown .active.create {
|
||||||
|
color: #495c68;
|
||||||
|
}
|
||||||
|
.ts-dropdown .create {
|
||||||
|
color: rgba(48, 48, 48, 0.5);
|
||||||
|
}
|
||||||
|
.ts-dropdown .spinner {
|
||||||
|
display: inline-block;
|
||||||
|
width: 30px;
|
||||||
|
height: 30px;
|
||||||
|
margin: 5px 8px;
|
||||||
|
}
|
||||||
|
.ts-dropdown .spinner::after {
|
||||||
|
content: " ";
|
||||||
|
display: block;
|
||||||
|
width: 24px;
|
||||||
|
height: 24px;
|
||||||
|
margin: 3px;
|
||||||
|
border-radius: 50%;
|
||||||
|
border: 5px solid #d0d0d0;
|
||||||
|
border-color: #d0d0d0 transparent #d0d0d0 transparent;
|
||||||
|
animation: lds-dual-ring 1.2s linear infinite;
|
||||||
|
}
|
||||||
|
@keyframes lds-dual-ring {
|
||||||
|
0% {
|
||||||
|
transform: rotate(0deg);
|
||||||
|
}
|
||||||
|
100% {
|
||||||
|
transform: rotate(360deg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-dropdown-content {
|
||||||
|
overflow: hidden auto;
|
||||||
|
max-height: 200px;
|
||||||
|
scroll-behavior: smooth;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.plugin-drag_drop .ts-dragging {
|
||||||
|
color: transparent !important;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-drag_drop .ts-dragging > * {
|
||||||
|
visibility: hidden !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.plugin-checkbox_options:not(.rtl) .option input {
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.plugin-checkbox_options.rtl .option input {
|
||||||
|
margin-left: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* stylelint-disable function-name-case */
|
||||||
|
.plugin-clear_button {
|
||||||
|
--ts-pr-clear-button: 1em;
|
||||||
|
}
|
||||||
|
.plugin-clear_button .clear-button {
|
||||||
|
opacity: 0;
|
||||||
|
position: absolute;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
right: calc(8px - 6px);
|
||||||
|
margin-right: 0 !important;
|
||||||
|
background: transparent !important;
|
||||||
|
transition: opacity 0.5s;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
.plugin-clear_button.form-select .clear-button, .plugin-clear_button.single .clear-button {
|
||||||
|
right: max(var(--ts-pr-caret), 8px);
|
||||||
|
}
|
||||||
|
.plugin-clear_button.focus.has-items .clear-button, .plugin-clear_button:not(.disabled):hover.has-items .clear-button {
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper .dropdown-header {
|
||||||
|
position: relative;
|
||||||
|
padding: 10px 8px;
|
||||||
|
border-bottom: 1px solid #d0d0d0;
|
||||||
|
background: color-mix(#fff, #d0d0d0, 85%);
|
||||||
|
border-radius: 3px 3px 0 0;
|
||||||
|
}
|
||||||
|
.ts-wrapper .dropdown-header-close {
|
||||||
|
position: absolute;
|
||||||
|
right: 8px;
|
||||||
|
top: 50%;
|
||||||
|
color: #303030;
|
||||||
|
opacity: 0.4;
|
||||||
|
margin-top: -12px;
|
||||||
|
line-height: 20px;
|
||||||
|
font-size: 20px !important;
|
||||||
|
}
|
||||||
|
.ts-wrapper .dropdown-header-close:hover {
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
.plugin-dropdown_input.focus.dropdown-active .ts-control {
|
||||||
|
box-shadow: none;
|
||||||
|
border: 1px solid #d0d0d0;
|
||||||
|
}
|
||||||
|
.plugin-dropdown_input .dropdown-input {
|
||||||
|
border: 1px solid #d0d0d0;
|
||||||
|
border-width: 0 0 1px;
|
||||||
|
display: block;
|
||||||
|
padding: 8px 8px;
|
||||||
|
box-shadow: none;
|
||||||
|
width: 100%;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
.plugin-dropdown_input .items-placeholder {
|
||||||
|
border: 0 none !important;
|
||||||
|
box-shadow: none !important;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
.plugin-dropdown_input.has-items .items-placeholder, .plugin-dropdown_input.dropdown-active .items-placeholder {
|
||||||
|
display: none !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.plugin-input_autogrow.has-items .ts-control > input {
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-input_autogrow.has-items.focus .ts-control > input {
|
||||||
|
flex: none;
|
||||||
|
min-width: 4px;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-input_autogrow.has-items.focus .ts-control > input::-ms-input-placeholder {
|
||||||
|
color: transparent;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-input_autogrow.has-items.focus .ts-control > input::placeholder {
|
||||||
|
color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-dropdown.plugin-optgroup_columns .ts-dropdown-content {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
.ts-dropdown.plugin-optgroup_columns .optgroup {
|
||||||
|
border-right: 1px solid #f2f2f2;
|
||||||
|
border-top: 0 none;
|
||||||
|
flex-grow: 1;
|
||||||
|
flex-basis: 0;
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
.ts-dropdown.plugin-optgroup_columns .optgroup:last-child {
|
||||||
|
border-right: 0 none;
|
||||||
|
}
|
||||||
|
.ts-dropdown.plugin-optgroup_columns .optgroup::before {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.ts-dropdown.plugin-optgroup_columns .optgroup-header {
|
||||||
|
border-top: 0 none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.plugin-remove_button .item {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button .item .remove {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration: none;
|
||||||
|
vertical-align: middle;
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0 6px;
|
||||||
|
border-radius: 0 2px 2px 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button .item .remove:hover {
|
||||||
|
background: rgba(0, 0, 0, 0.05);
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button.disabled .item .remove:hover {
|
||||||
|
background: none;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button .remove-single {
|
||||||
|
position: absolute;
|
||||||
|
right: 0;
|
||||||
|
top: 0;
|
||||||
|
font-size: 23px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.plugin-remove_button:not(.rtl) .item {
|
||||||
|
padding-right: 0 !important;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button:not(.rtl) .item .remove {
|
||||||
|
border-left: 1px solid #d0d0d0;
|
||||||
|
margin-left: 6px;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button:not(.rtl) .item.active .remove {
|
||||||
|
border-left-color: #cacaca;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button:not(.rtl).disabled .item .remove {
|
||||||
|
border-left-color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.plugin-remove_button.rtl .item {
|
||||||
|
padding-left: 0 !important;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button.rtl .item .remove {
|
||||||
|
border-right: 1px solid #d0d0d0;
|
||||||
|
margin-right: 6px;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button.rtl .item.active .remove {
|
||||||
|
border-right-color: #cacaca;
|
||||||
|
}
|
||||||
|
.ts-wrapper.plugin-remove_button.rtl.disabled .item .remove {
|
||||||
|
border-right-color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--ts-pr-clear-button: 0;
|
||||||
|
--ts-pr-caret: 0;
|
||||||
|
--ts-pr-min: .75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper.single .ts-control, .ts-wrapper.single .ts-control input {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-control:not(.rtl) {
|
||||||
|
padding-right: max(var(--ts-pr-min), var(--ts-pr-clear-button) + var(--ts-pr-caret)) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-control.rtl {
|
||||||
|
padding-left: max(var(--ts-pr-min), var(--ts-pr-clear-button) + var(--ts-pr-caret)) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-wrapper {
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-dropdown,
|
||||||
|
.ts-control,
|
||||||
|
.ts-control input {
|
||||||
|
color: #303030;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 13px;
|
||||||
|
line-height: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-control,
|
||||||
|
.ts-wrapper.single.input-active .ts-control {
|
||||||
|
background: #fff;
|
||||||
|
cursor: text;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ts-hidden-accessible {
|
||||||
|
border: 0 !important;
|
||||||
|
clip: rect(0 0 0 0) !important;
|
||||||
|
-webkit-clip-path: inset(50%) !important;
|
||||||
|
clip-path: inset(50%) !important;
|
||||||
|
overflow: hidden !important;
|
||||||
|
padding: 0 !important;
|
||||||
|
position: absolute !important;
|
||||||
|
width: 1px !important;
|
||||||
|
white-space: nowrap !important;
|
||||||
|
}
|
||||||
|
/*# sourceMappingURL=tom-select.css.map */
|
||||||
@@ -7,6 +7,9 @@ class AnalyticsController < ApplicationController
|
|||||||
def index
|
def index
|
||||||
authorize :analytics, :index?
|
authorize :analytics, :index?
|
||||||
|
|
||||||
|
# Track overall request time
|
||||||
|
request_start = Time.current
|
||||||
|
|
||||||
# Time period selector (default: last 24 hours)
|
# Time period selector (default: last 24 hours)
|
||||||
@time_period = params[:period]&.to_sym || :day
|
@time_period = params[:period]&.to_sym || :day
|
||||||
@start_time = calculate_start_time(@time_period)
|
@start_time = calculate_start_time(@time_period)
|
||||||
@@ -23,10 +26,13 @@ class AnalyticsController < ApplicationController
|
|||||||
# Cache key includes period and start_time (hour-aligned for consistency)
|
# Cache key includes period and start_time (hour-aligned for consistency)
|
||||||
cache_key_base = "analytics/#{@time_period}/#{@start_time.to_i}"
|
cache_key_base = "analytics/#{@time_period}/#{@start_time.to_i}"
|
||||||
|
|
||||||
# Core statistics - cached
|
# Core statistics - cached (uses DuckDB if available)
|
||||||
|
stat_start = Time.current
|
||||||
@total_events = Rails.cache.fetch("#{cache_key_base}/total_events", expires_in: cache_ttl) do
|
@total_events = Rails.cache.fetch("#{cache_key_base}/total_events", expires_in: cache_ttl) do
|
||||||
Event.where("timestamp >= ?", @start_time).count
|
with_duckdb_fallback { EventDdb.count_since(@start_time) } ||
|
||||||
|
Event.where("timestamp >= ?", @start_time).count
|
||||||
end
|
end
|
||||||
|
Rails.logger.info "[Analytics Perf] Total events: #{((Time.current - stat_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
@total_rules = Rails.cache.fetch("analytics/total_rules", expires_in: 5.minutes) do
|
@total_rules = Rails.cache.fetch("analytics/total_rules", expires_in: 5.minutes) do
|
||||||
Rule.enabled.count
|
Rule.enabled.count
|
||||||
@@ -40,40 +46,40 @@ class AnalyticsController < ApplicationController
|
|||||||
NetworkRange.count
|
NetworkRange.count
|
||||||
end
|
end
|
||||||
|
|
||||||
# Event breakdown by action - cached
|
# Event breakdown by action - cached (uses DuckDB if available)
|
||||||
|
stat_start = Time.current
|
||||||
@event_breakdown = Rails.cache.fetch("#{cache_key_base}/event_breakdown", expires_in: cache_ttl) do
|
@event_breakdown = Rails.cache.fetch("#{cache_key_base}/event_breakdown", expires_in: cache_ttl) do
|
||||||
Event.where("timestamp >= ?", @start_time)
|
with_duckdb_fallback { EventDdb.breakdown_by_action(@start_time) } ||
|
||||||
.group(:waf_action)
|
Event.where("timestamp >= ?", @start_time)
|
||||||
.count
|
.group(:waf_action)
|
||||||
.transform_keys do |action_id|
|
.count
|
||||||
case action_id
|
|
||||||
when 0 then 'allow'
|
|
||||||
when 1 then 'deny'
|
|
||||||
when 2 then 'redirect'
|
|
||||||
when 3 then 'challenge'
|
|
||||||
else 'unknown'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
Rails.logger.info "[Analytics Perf] Event breakdown: #{((Time.current - stat_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
# Top countries by event count - cached (now uses denormalized country column)
|
# Top countries by event count - cached (uses DuckDB if available)
|
||||||
|
stat_start = Time.current
|
||||||
@top_countries = Rails.cache.fetch("#{cache_key_base}/top_countries", expires_in: cache_ttl) do
|
@top_countries = Rails.cache.fetch("#{cache_key_base}/top_countries", expires_in: cache_ttl) do
|
||||||
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
|
with_duckdb_fallback { EventDdb.top_countries(@start_time, 10) } ||
|
||||||
.group(:country)
|
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
|
||||||
.count
|
.group(:country)
|
||||||
.sort_by { |_, count| -count }
|
.count
|
||||||
.first(10)
|
.sort_by { |_, count| -count }
|
||||||
|
.first(10)
|
||||||
end
|
end
|
||||||
|
Rails.logger.info "[Analytics Perf] Top countries: #{((Time.current - stat_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
# Top blocked IPs - cached
|
# Top blocked IPs - cached (uses DuckDB if available)
|
||||||
|
stat_start = Time.current
|
||||||
@top_blocked_ips = Rails.cache.fetch("#{cache_key_base}/top_blocked_ips", expires_in: cache_ttl) do
|
@top_blocked_ips = Rails.cache.fetch("#{cache_key_base}/top_blocked_ips", expires_in: cache_ttl) do
|
||||||
Event.where("timestamp >= ?", @start_time)
|
with_duckdb_fallback { EventDdb.top_blocked_ips(@start_time, 10) } ||
|
||||||
.where(waf_action: 1) # deny action in enum
|
Event.where("timestamp >= ?", @start_time)
|
||||||
.group(:ip_address)
|
.where(waf_action: 0) # deny action in enum
|
||||||
.count
|
.group(:ip_address)
|
||||||
.sort_by { |_, count| -count }
|
.count
|
||||||
.first(10)
|
.sort_by { |_, count| -count }
|
||||||
|
.first(10)
|
||||||
end
|
end
|
||||||
|
Rails.logger.info "[Analytics Perf] Top blocked IPs: #{((Time.current - stat_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
# Network range intelligence breakdown - cached
|
# Network range intelligence breakdown - cached
|
||||||
@network_intelligence = Rails.cache.fetch("analytics/network_intelligence", expires_in: 10.minutes) do
|
@network_intelligence = Rails.cache.fetch("analytics/network_intelligence", expires_in: 10.minutes) do
|
||||||
@@ -100,7 +106,7 @@ class AnalyticsController < ApplicationController
|
|||||||
total_users: User.count,
|
total_users: User.count,
|
||||||
active_rules: Rule.enabled.count,
|
active_rules: Rule.enabled.count,
|
||||||
disabled_rules: Rule.where(enabled: false).count,
|
disabled_rules: Rule.where(enabled: false).count,
|
||||||
recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 1).count # 1 = deny
|
recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 0).count # 0 = deny
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -110,7 +116,11 @@ class AnalyticsController < ApplicationController
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Prepare data for charts - split caching for current vs historical data
|
# Prepare data for charts - split caching for current vs historical data
|
||||||
|
stat_start = Time.current
|
||||||
@chart_data = prepare_chart_data_with_split_cache(cache_key_base, cache_ttl)
|
@chart_data = prepare_chart_data_with_split_cache(cache_key_base, cache_ttl)
|
||||||
|
Rails.logger.info "[Analytics Perf] Chart data: #{((Time.current - stat_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
|
Rails.logger.info "[Analytics Perf] TOTAL REQUEST: #{((Time.current - request_start) * 1000).round(1)}ms"
|
||||||
|
|
||||||
respond_to do |format|
|
respond_to do |format|
|
||||||
format.html
|
format.html
|
||||||
@@ -125,38 +135,90 @@ class AnalyticsController < ApplicationController
|
|||||||
@time_period = params[:period]&.to_sym || :day
|
@time_period = params[:period]&.to_sym || :day
|
||||||
@start_time = calculate_start_time(@time_period)
|
@start_time = calculate_start_time(@time_period)
|
||||||
|
|
||||||
# Top networks by request volume (using denormalized network_range_id)
|
# Top networks by request volume - use DuckDB if available
|
||||||
# Use a subquery approach to avoid PostgreSQL GROUP BY issues with network_ranges.*
|
network_stats = with_duckdb_fallback { EventDdb.top_networks(@start_time, 50) }
|
||||||
event_stats = Event.where("timestamp >= ?", @start_time)
|
|
||||||
.where.not(network_range_id: nil)
|
|
||||||
.group(:network_range_id)
|
|
||||||
.select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
|
|
||||||
|
|
||||||
# Join the stats back to NetworkRange to get full network details
|
if network_stats
|
||||||
@top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
|
# DuckDB path: array format [network_range_id, event_count, unique_ips]
|
||||||
.select("network_ranges.*, stats.event_count, stats.unique_ips")
|
network_ids = network_stats.map { |row| row[0] }
|
||||||
.order("stats.event_count DESC")
|
stats_by_id = network_stats.to_h { |row| [row[0], { event_count: row[1], unique_ips: row[2] }] }
|
||||||
.limit(50)
|
|
||||||
|
@top_networks = NetworkRange.where(id: network_ids)
|
||||||
|
.to_a
|
||||||
|
.map do |network|
|
||||||
|
stats = stats_by_id[network.id]
|
||||||
|
network.define_singleton_method(:event_count) { stats[:event_count] }
|
||||||
|
network.define_singleton_method(:unique_ips) { stats[:unique_ips] }
|
||||||
|
|
||||||
|
# Add inherited intelligence support
|
||||||
|
intelligence = network.inherited_intelligence
|
||||||
|
if intelligence[:inherited]
|
||||||
|
network.define_singleton_method(:display_company) { intelligence[:company] }
|
||||||
|
network.define_singleton_method(:display_country) { intelligence[:country] }
|
||||||
|
network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
|
||||||
|
network.define_singleton_method(:has_inherited_data?) { true }
|
||||||
|
else
|
||||||
|
network.define_singleton_method(:display_company) { network.company }
|
||||||
|
network.define_singleton_method(:display_country) { network.country }
|
||||||
|
network.define_singleton_method(:inherited_from) { nil }
|
||||||
|
network.define_singleton_method(:has_inherited_data?) { false }
|
||||||
|
end
|
||||||
|
|
||||||
|
network
|
||||||
|
end
|
||||||
|
.sort_by { |n| -n.event_count }
|
||||||
|
else
|
||||||
|
# PostgreSQL fallback
|
||||||
|
event_stats = Event.where("timestamp >= ?", @start_time)
|
||||||
|
.where.not(network_range_id: nil)
|
||||||
|
.group(:network_range_id)
|
||||||
|
.select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
|
||||||
|
|
||||||
|
@top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
|
||||||
|
.select("network_ranges.*, stats.event_count, stats.unique_ips")
|
||||||
|
.order("stats.event_count DESC")
|
||||||
|
.limit(50)
|
||||||
|
|
||||||
|
# Add inherited intelligence support for PostgreSQL fallback
|
||||||
|
@top_networks = @top_networks.to_a.map do |network|
|
||||||
|
intelligence = network.inherited_intelligence
|
||||||
|
if intelligence[:inherited]
|
||||||
|
network.define_singleton_method(:display_company) { intelligence[:company] }
|
||||||
|
network.define_singleton_method(:display_country) { intelligence[:country] }
|
||||||
|
network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
|
||||||
|
network.define_singleton_method(:has_inherited_data?) { true }
|
||||||
|
else
|
||||||
|
network.define_singleton_method(:display_company) { network.company }
|
||||||
|
network.define_singleton_method(:display_country) { network.country }
|
||||||
|
network.define_singleton_method(:inherited_from) { nil }
|
||||||
|
network.define_singleton_method(:has_inherited_data?) { false }
|
||||||
|
end
|
||||||
|
network
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Network type breakdown with traffic stats
|
# Network type breakdown with traffic stats
|
||||||
@network_breakdown = calculate_network_type_stats(@start_time)
|
@network_breakdown = calculate_network_type_stats(@start_time)
|
||||||
|
|
||||||
# Company breakdown for top traffic sources (using denormalized company column)
|
# Company breakdown for top traffic sources - use DuckDB if available
|
||||||
@top_companies = Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
|
@top_companies = with_duckdb_fallback { EventDdb.top_companies(@start_time, 20) } ||
|
||||||
|
Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
|
||||||
.group(:company)
|
.group(:company)
|
||||||
.select("company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
|
.select("company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
|
||||||
.order("event_count DESC")
|
.order("event_count DESC")
|
||||||
.limit(20)
|
.limit(20)
|
||||||
|
|
||||||
# ASN breakdown (using denormalized asn columns)
|
# ASN breakdown - use DuckDB if available
|
||||||
@top_asns = Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
|
@top_asns = with_duckdb_fallback { EventDdb.top_asns(@start_time, 15) } ||
|
||||||
|
Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
|
||||||
.group(:asn, :asn_org)
|
.group(:asn, :asn_org)
|
||||||
.select("asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
|
.select("asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
|
||||||
.order("event_count DESC")
|
.order("event_count DESC")
|
||||||
.limit(15)
|
.limit(15)
|
||||||
|
|
||||||
# Geographic breakdown (using denormalized country column)
|
# Geographic breakdown - use DuckDB if available
|
||||||
@top_countries = Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
|
@top_countries = with_duckdb_fallback { EventDdb.top_countries_with_stats(@start_time, 15) } ||
|
||||||
|
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
|
||||||
.group(:country)
|
.group(:country)
|
||||||
.select("country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
|
.select("country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
|
||||||
.order("event_count DESC")
|
.order("event_count DESC")
|
||||||
@@ -199,12 +261,15 @@ class AnalyticsController < ApplicationController
|
|||||||
# Historical hours are cached for full TTL, current hour cached briefly for freshness
|
# Historical hours are cached for full TTL, current hour cached briefly for freshness
|
||||||
|
|
||||||
# Cache historical hours (1-23 hours ago) - these are complete and won't change
|
# Cache historical hours (1-23 hours ago) - these are complete and won't change
|
||||||
# No expiration - will stick around until evicted by cache store
|
# No expiration - will stick around until evicted by cache store (uses DuckDB if available)
|
||||||
historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do
|
historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do
|
||||||
historical_start = 23.hours.ago.beginning_of_hour
|
historical_start = 23.hours.ago.beginning_of_hour
|
||||||
events_by_hour = Event.where("timestamp >= ? AND timestamp < ?", historical_start, Time.current.beginning_of_hour)
|
current_hour_start = Time.current.beginning_of_hour
|
||||||
.group("DATE_TRUNC('hour', timestamp)")
|
|
||||||
.count
|
events_by_hour = with_duckdb_fallback { EventDdb.hourly_timeline(historical_start, current_hour_start) } ||
|
||||||
|
Event.where("timestamp >= ? AND timestamp < ?", historical_start, current_hour_start)
|
||||||
|
.group("DATE_TRUNC('hour', timestamp)")
|
||||||
|
.count
|
||||||
|
|
||||||
(1..23).map do |hour_ago|
|
(1..23).map do |hour_ago|
|
||||||
hour_time = hour_ago.hours.ago.beginning_of_hour
|
hour_time = hour_ago.hours.ago.beginning_of_hour
|
||||||
@@ -217,6 +282,7 @@ class AnalyticsController < ApplicationController
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Current hour (0 hours ago) - cache very briefly since it's actively accumulating
|
# Current hour (0 hours ago) - cache very briefly since it's actively accumulating
|
||||||
|
# ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute)
|
||||||
current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
|
current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
|
||||||
hour_time = Time.current.beginning_of_hour
|
hour_time = Time.current.beginning_of_hour
|
||||||
count = Event.where("timestamp >= ?", hour_time).count
|
count = Event.where("timestamp >= ?", hour_time).count
|
||||||
@@ -298,6 +364,12 @@ class AnalyticsController < ApplicationController
|
|||||||
end
|
end
|
||||||
|
|
||||||
def calculate_network_type_stats(start_time)
|
def calculate_network_type_stats(start_time)
|
||||||
|
# Try DuckDB first, fallback to PostgreSQL
|
||||||
|
duckdb_stats = with_duckdb_fallback { EventDdb.network_type_stats(start_time) }
|
||||||
|
|
||||||
|
return duckdb_stats if duckdb_stats
|
||||||
|
|
||||||
|
# PostgreSQL fallback
|
||||||
# Get all network types with their traffic statistics using denormalized columns
|
# Get all network types with their traffic statistics using denormalized columns
|
||||||
network_types = [
|
network_types = [
|
||||||
{ type: 'datacenter', label: 'Datacenter', column: :is_datacenter },
|
{ type: 'datacenter', label: 'Datacenter', column: :is_datacenter },
|
||||||
@@ -341,6 +413,12 @@ class AnalyticsController < ApplicationController
|
|||||||
end
|
end
|
||||||
|
|
||||||
def calculate_suspicious_patterns(start_time)
|
def calculate_suspicious_patterns(start_time)
|
||||||
|
# Try DuckDB first, fallback to PostgreSQL
|
||||||
|
duckdb_patterns = with_duckdb_fallback { EventDdb.suspicious_patterns(start_time) }
|
||||||
|
|
||||||
|
return duckdb_patterns if duckdb_patterns
|
||||||
|
|
||||||
|
# PostgreSQL fallback
|
||||||
patterns = {}
|
patterns = {}
|
||||||
|
|
||||||
# High volume networks (top 1% by request count) - using denormalized network_range_id
|
# High volume networks (top 1% by request count) - using denormalized network_range_id
|
||||||
@@ -366,9 +444,9 @@ class AnalyticsController < ApplicationController
|
|||||||
high_deny_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time)
|
high_deny_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time)
|
||||||
.group(:network_range_id)
|
.group(:network_range_id)
|
||||||
.select("network_range_id,
|
.select("network_range_id,
|
||||||
COUNT(CASE WHEN waf_action = 1 THEN 1 END) as denied_count,
|
COUNT(CASE WHEN waf_action = 0 THEN 1 END) as denied_count,
|
||||||
COUNT(*) as total_count")
|
COUNT(*) as total_count")
|
||||||
.having("COUNT(CASE WHEN waf_action = 1 THEN 1 END)::float / COUNT(*) > 0.5")
|
.having("COUNT(CASE WHEN waf_action = 0 THEN 1 END)::float / COUNT(*) > 0.5")
|
||||||
.having("COUNT(*) >= 10") # minimum threshold
|
.having("COUNT(*) >= 10") # minimum threshold
|
||||||
|
|
||||||
patterns[:high_deny_rate] = {
|
patterns[:high_deny_rate] = {
|
||||||
@@ -400,12 +478,14 @@ class AnalyticsController < ApplicationController
|
|||||||
{
|
{
|
||||||
id: network.id,
|
id: network.id,
|
||||||
cidr: network.cidr,
|
cidr: network.cidr,
|
||||||
company: network.company,
|
company: network.display_company,
|
||||||
asn: network.asn,
|
asn: network.asn,
|
||||||
country: network.country,
|
country: network.display_country,
|
||||||
network_type: network.network_type,
|
network_type: network.network_type,
|
||||||
event_count: network.event_count,
|
event_count: network.event_count,
|
||||||
unique_ips: network.unique_ips
|
unique_ips: network.unique_ips,
|
||||||
|
has_inherited_data: network.has_inherited_data?,
|
||||||
|
inherited_from: network.inherited_from
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
network_breakdown: @network_breakdown,
|
network_breakdown: @network_breakdown,
|
||||||
@@ -457,4 +537,27 @@ class AnalyticsController < ApplicationController
|
|||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Helper method to try DuckDB first, fall back to PostgreSQL
|
||||||
|
def with_duckdb_fallback(&block)
|
||||||
|
result = yield
|
||||||
|
result.nil? ? nil : result # Return result or nil to trigger fallback
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.warn "[Analytics] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
|
||||||
|
nil # Return nil to trigger fallback
|
||||||
|
end
|
||||||
|
|
||||||
|
# Check if DuckDB has recent data (within last 2 minutes)
|
||||||
|
# Returns true if DuckDB is up-to-date, false if potentially stale
|
||||||
|
def duckdb_is_fresh?
|
||||||
|
newest = AnalyticsDuckdbService.instance.newest_event_timestamp
|
||||||
|
return false if newest.nil?
|
||||||
|
|
||||||
|
# Consider fresh if newest event is within 2 minutes
|
||||||
|
# (sync job runs every 1 minute, so 2 minutes allows for some lag)
|
||||||
|
newest >= 2.minutes.ago
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.warn "[Analytics] Error checking DuckDB freshness: #{e.message}"
|
||||||
|
false
|
||||||
|
end
|
||||||
end
|
end
|
||||||
126
app/controllers/bot_network_ranges_controller.rb
Normal file
126
app/controllers/bot_network_ranges_controller.rb
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class BotNetworkRangesController < ApplicationController
|
||||||
|
before_action :authenticate_user!
|
||||||
|
before_action :require_admin
|
||||||
|
|
||||||
|
def index
|
||||||
|
@bot_sources = BotNetworkRangeImporter::BOT_SOURCES
|
||||||
|
@recent_imports = DataImport.where(import_type: 'bot_network_ranges').order(created_at: :desc).limit(10)
|
||||||
|
@bot_network_ranges = NetworkRange.where("source LIKE 'bot_import_%'").order(created_at: :desc).limit(50)
|
||||||
|
end
|
||||||
|
|
||||||
|
def import
|
||||||
|
source_key = params[:source]
|
||||||
|
options = import_options
|
||||||
|
|
||||||
|
if source_key.present?
|
||||||
|
# Perform import synchronously for immediate feedback
|
||||||
|
begin
|
||||||
|
result = BotNetworkRangeImporter.import_from_source(source_key, options)
|
||||||
|
|
||||||
|
# Create a data import record
|
||||||
|
DataImport.create!(
|
||||||
|
import_type: 'bot_network_ranges',
|
||||||
|
source: source_key.to_s,
|
||||||
|
status: 'completed',
|
||||||
|
records_processed: result[:imported],
|
||||||
|
notes: "Imported from #{result[:source]}: #{result[:note] || 'Success'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
flash[:notice] = "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
|
||||||
|
rescue => e
|
||||||
|
flash[:alert] = "Failed to import from #{source_key}: #{e.message}"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
flash[:alert] = "Please select a source to import from"
|
||||||
|
end
|
||||||
|
|
||||||
|
redirect_to bot_network_ranges_path
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_async
|
||||||
|
source_key = params[:source]
|
||||||
|
options = import_options
|
||||||
|
|
||||||
|
if source_key.present?
|
||||||
|
# Create a data import record for tracking
|
||||||
|
data_import = DataImport.create!(
|
||||||
|
import_type: 'bot_network_ranges',
|
||||||
|
source: source_key.to_s,
|
||||||
|
status: 'pending',
|
||||||
|
records_processed: 0,
|
||||||
|
notes: "Import job queued for #{source_key}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Queue the background job
|
||||||
|
ImportBotNetworkRangesJob.perform_later(source_key, options.merge(data_import_id: data_import.id))
|
||||||
|
|
||||||
|
flash[:notice] = "Import job queued for #{source_key}. You'll be notified when it's complete."
|
||||||
|
else
|
||||||
|
flash[:alert] = "Please select a source to import from"
|
||||||
|
end
|
||||||
|
|
||||||
|
redirect_to bot_network_ranges_path
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_all
|
||||||
|
options = import_options
|
||||||
|
|
||||||
|
# Create a data import record for batch import
|
||||||
|
data_import = DataImport.create!(
|
||||||
|
import_type: 'bot_network_ranges',
|
||||||
|
source: 'all_sources',
|
||||||
|
status: 'pending',
|
||||||
|
records_processed: 0,
|
||||||
|
notes: "Batch import job queued for all available sources"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Queue the batch import job
|
||||||
|
ImportAllBotNetworkRangesJob.perform_later(options.merge(data_import_id: data_import.id))
|
||||||
|
|
||||||
|
flash[:notice] = "Batch import job queued for all sources. This may take several minutes."
|
||||||
|
redirect_to bot_network_ranges_path
|
||||||
|
end
|
||||||
|
|
||||||
|
def show
|
||||||
|
@network_ranges = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
|
||||||
|
.order(created_at: :desc)
|
||||||
|
.page(params[:page])
|
||||||
|
.per(50)
|
||||||
|
|
||||||
|
@source_name = BotNetworkRangeImporter::BOT_SOURCES[params[:source].to_sym]&.dig(:name) || params[:source]
|
||||||
|
@import_stats = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
|
||||||
|
.group(:source)
|
||||||
|
.count
|
||||||
|
end
|
||||||
|
|
||||||
|
def destroy
|
||||||
|
source = params[:source]
|
||||||
|
deleted_count = NetworkRange.where("source LIKE 'bot_import_#{source}%'").delete_all
|
||||||
|
|
||||||
|
flash[:notice] = "Deleted #{deleted_count} network ranges from #{source}"
|
||||||
|
redirect_to bot_network_ranges_path
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def require_admin
|
||||||
|
redirect_to root_path, alert: 'Admin access required' unless current_user&.admin?
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_options
|
||||||
|
options = {}
|
||||||
|
|
||||||
|
# AWS-specific options
|
||||||
|
if params[:aws_services].present?
|
||||||
|
options[:aws_services] = params[:aws_services].split(',').map(&:strip)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Batch size control
|
||||||
|
options[:batch_size] = params[:batch_size].to_i if params[:batch_size].present?
|
||||||
|
options[:batch_size] = 1000 if options[:batch_size].zero?
|
||||||
|
|
||||||
|
options
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -36,6 +36,9 @@ class EventsController < ApplicationController
|
|||||||
@events = @events.by_asn(params[:asn]) if params[:asn].present?
|
@events = @events.by_asn(params[:asn]) if params[:asn].present?
|
||||||
@events = @events.by_network_cidr(params[:network_cidr]) if params[:network_cidr].present?
|
@events = @events.by_network_cidr(params[:network_cidr]) if params[:network_cidr].present?
|
||||||
|
|
||||||
|
# Bot filtering
|
||||||
|
@events = @events.exclude_bots if params[:exclude_bots] == "true"
|
||||||
|
|
||||||
Rails.logger.debug "Events count after filtering: #{@events.count}"
|
Rails.logger.debug "Events count after filtering: #{@events.count}"
|
||||||
|
|
||||||
# Debug info
|
# Debug info
|
||||||
|
|||||||
@@ -46,8 +46,10 @@ class NetworkRangesController < ApplicationController
|
|||||||
authorize @network_range
|
authorize @network_range
|
||||||
|
|
||||||
if @network_range.persisted?
|
if @network_range.persisted?
|
||||||
# Real network - use direct IP containment for consistency with stats
|
# Real network - use indexed network_range_id for much better performance
|
||||||
events_scope = Event.where("ip_address <<= ?", @network_range.cidr).recent
|
# Include child network ranges to capture all traffic within this network block
|
||||||
|
network_ids = [@network_range.id] + @network_range.child_ranges.pluck(:id)
|
||||||
|
events_scope = Event.where(network_range_id: network_ids).recent
|
||||||
else
|
else
|
||||||
# Virtual network - find events by IP range containment
|
# Virtual network - find events by IP range containment
|
||||||
events_scope = Event.where("ip_address <<= ?::inet", @network_range.to_s).recent
|
events_scope = Event.where("ip_address <<= ?::inet", @network_range.to_s).recent
|
||||||
@@ -58,22 +60,24 @@ class NetworkRangesController < ApplicationController
|
|||||||
|
|
||||||
@child_ranges = @network_range.child_ranges.limit(20)
|
@child_ranges = @network_range.child_ranges.limit(20)
|
||||||
@parent_ranges = @network_range.parent_ranges.limit(10)
|
@parent_ranges = @network_range.parent_ranges.limit(10)
|
||||||
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user).order(created_at: :desc) : []
|
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user, :network_range, :waf_policy).order(created_at: :desc) : []
|
||||||
|
|
||||||
# Load rules from supernets and subnets
|
# Load rules from supernets and subnets
|
||||||
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user).limit(10) : []
|
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user, :waf_policy).limit(10) : []
|
||||||
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user).limit(20) : []
|
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user, :waf_policy).limit(20) : []
|
||||||
|
|
||||||
# Traffic analytics (if we have events)
|
# Traffic analytics (if we have events)
|
||||||
@traffic_stats = calculate_traffic_stats(@network_range)
|
@traffic_stats = calculate_traffic_stats(@network_range)
|
||||||
|
|
||||||
# Check if we have IPAPI data (or if parent has it)
|
# Check if we have IPAPI data (or if parent has it) - cache expensive parent lookup
|
||||||
@has_ipapi_data = @network_range.has_network_data_from?(:ipapi)
|
@has_ipapi_data = @network_range.has_network_data_from?(:ipapi)
|
||||||
@parent_with_ipapi = nil
|
@parent_with_ipapi = nil
|
||||||
|
|
||||||
unless @has_ipapi_data
|
unless @has_ipapi_data
|
||||||
# Check if parent has IPAPI data
|
# Cache expensive parent intelligence lookup
|
||||||
parent = @network_range.parent_with_intelligence
|
parent = Rails.cache.fetch("network_parent_intel:#{@network_range.cache_key}", expires_in: 1.hour) do
|
||||||
|
@network_range.parent_with_intelligence
|
||||||
|
end
|
||||||
if parent&.has_network_data_from?(:ipapi)
|
if parent&.has_network_data_from?(:ipapi)
|
||||||
@parent_with_ipapi = parent
|
@parent_with_ipapi = parent
|
||||||
@has_ipapi_data = true
|
@has_ipapi_data = true
|
||||||
@@ -194,6 +198,15 @@ class NetworkRangesController < ApplicationController
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
# Helper method to try DuckDB first, fall back to PostgreSQL
|
||||||
|
def with_duckdb_fallback(&block)
|
||||||
|
result = yield
|
||||||
|
result.nil? ? nil : result # Return result or nil to trigger fallback
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.warn "[NetworkRanges] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
|
||||||
|
nil # Return nil to trigger fallback
|
||||||
|
end
|
||||||
|
|
||||||
def set_network_range
|
def set_network_range
|
||||||
# Handle CIDR slugs (e.g., "40.77.167.100_32" -> "40.77.167.100/32")
|
# Handle CIDR slugs (e.g., "40.77.167.100_32" -> "40.77.167.100/32")
|
||||||
cidr = params[:id].gsub('_', '/')
|
cidr = params[:id].gsub('_', '/')
|
||||||
@@ -245,28 +258,40 @@ class NetworkRangesController < ApplicationController
|
|||||||
if network_range.persisted?
|
if network_range.persisted?
|
||||||
# Real network - use cached events_count for total requests (much more performant)
|
# Real network - use cached events_count for total requests (much more performant)
|
||||||
if network_range.events_count > 0
|
if network_range.events_count > 0
|
||||||
# Base query for consistent IP containment logic
|
# Use indexed network_range_id for much better performance instead of expensive CIDR operator
|
||||||
base_query = Event.where("ip_address <<= ?", network_range.cidr)
|
# Include child network ranges to capture all traffic within this network block
|
||||||
|
network_ids = [network_range.id] + network_range.child_ranges.pluck(:id)
|
||||||
|
|
||||||
# Use separate queries: one for grouping (without ordering), one for recent activity (with ordering)
|
# Try DuckDB first for stats (much faster)
|
||||||
events_for_grouping = base_query.limit(1000)
|
duckdb_stats = with_duckdb_fallback { EventDdb.network_traffic_stats(network_ids) }
|
||||||
events_for_activity = base_query.recent.limit(20)
|
duckdb_top_paths = with_duckdb_fallback { EventDdb.network_top_paths(network_ids, 10) }
|
||||||
|
duckdb_top_agents = with_duckdb_fallback { EventDdb.network_top_user_agents(network_ids, 5) }
|
||||||
|
|
||||||
# Calculate counts properly - use consistent base_query for all counts
|
if duckdb_stats
|
||||||
total_requests = base_query.count
|
# DuckDB success - use fast aggregated stats
|
||||||
unique_ips = base_query.except(:order).distinct.count(:ip_address)
|
stats = duckdb_stats.merge(
|
||||||
blocked_requests = base_query.blocked.count
|
top_paths: duckdb_top_paths&.to_h || {},
|
||||||
allowed_requests = base_query.allowed.count
|
top_user_agents: duckdb_top_agents&.to_h || {},
|
||||||
|
recent_activity: Event.where(network_range_id: network_ids).recent.limit(20)
|
||||||
|
)
|
||||||
|
else
|
||||||
|
# PostgreSQL fallback
|
||||||
|
base_query = Event.where(network_range_id: network_ids)
|
||||||
|
events_for_grouping = base_query.limit(1000)
|
||||||
|
events_for_activity = base_query.recent.limit(20)
|
||||||
|
|
||||||
{
|
stats = {
|
||||||
total_requests: total_requests,
|
total_requests: base_query.count,
|
||||||
unique_ips: unique_ips,
|
unique_ips: base_query.except(:order).distinct.count(:ip_address),
|
||||||
blocked_requests: blocked_requests,
|
blocked_requests: base_query.blocked.count,
|
||||||
allowed_requests: allowed_requests,
|
allowed_requests: base_query.allowed.count,
|
||||||
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
|
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
|
||||||
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
|
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
|
||||||
recent_activity: events_for_activity
|
recent_activity: events_for_activity
|
||||||
}
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
stats
|
||||||
else
|
else
|
||||||
# No events - return empty stats
|
# No events - return empty stats
|
||||||
{
|
{
|
||||||
@@ -294,8 +319,8 @@ class NetworkRangesController < ApplicationController
|
|||||||
unique_ips: base_query.except(:order).distinct.count(:ip_address),
|
unique_ips: base_query.except(:order).distinct.count(:ip_address),
|
||||||
blocked_requests: base_query.blocked.count,
|
blocked_requests: base_query.blocked.count,
|
||||||
allowed_requests: base_query.allowed.count,
|
allowed_requests: base_query.allowed.count,
|
||||||
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
|
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
|
||||||
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
|
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
|
||||||
recent_activity: events_for_activity
|
recent_activity: events_for_activity
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -46,12 +46,9 @@ class RulesController < ApplicationController
|
|||||||
process_quick_create_parameters
|
process_quick_create_parameters
|
||||||
|
|
||||||
# Handle network range creation if CIDR is provided
|
# Handle network range creation if CIDR is provided
|
||||||
if params[:cidr].present? && @rule.network_rule?
|
cidr_param = params[:new_cidr].presence || params[:cidr].presence
|
||||||
network_range = NetworkRange.find_or_create_by(cidr: params[:cidr]) do |range|
|
if cidr_param.present? && @rule.network_rule?
|
||||||
range.user = Current.user
|
network_range = NetworkRange.find_or_create_by_cidr(cidr_param, user: Current.user, source: 'manual')
|
||||||
range.source = 'manual'
|
|
||||||
range.creation_reason = "Created for rule ##{@rule.id}"
|
|
||||||
end
|
|
||||||
@rule.network_range = network_range
|
@rule.network_range = network_range
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -132,7 +129,9 @@ class RulesController < ApplicationController
|
|||||||
:expires_at,
|
:expires_at,
|
||||||
:enabled,
|
:enabled,
|
||||||
:source,
|
:source,
|
||||||
:network_range_id
|
:network_range_id,
|
||||||
|
:header_name,
|
||||||
|
:header_value
|
||||||
]
|
]
|
||||||
|
|
||||||
# Only include conditions for non-network rules
|
# Only include conditions for non-network rules
|
||||||
@@ -250,15 +249,18 @@ def process_quick_create_parameters
|
|||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse metadata if it's a string that looks like JSON
|
# Parse metadata textarea first if it's JSON
|
||||||
if @rule.metadata.is_a?(String) && @rule.metadata.starts_with?('{')
|
if @rule.metadata.is_a?(String) && @rule.metadata.present? && @rule.metadata.starts_with?('{')
|
||||||
begin
|
begin
|
||||||
@rule.metadata = JSON.parse(@rule.metadata)
|
@rule.metadata = JSON.parse(@rule.metadata)
|
||||||
rescue JSON::ParserError
|
rescue JSON::ParserError
|
||||||
# Keep as string if not valid JSON
|
# Keep as string if not valid JSON - will be caught by validation
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Ensure metadata is a hash
|
||||||
|
@rule.metadata = {} unless @rule.metadata.is_a?(Hash)
|
||||||
|
|
||||||
# Handle expires_at parsing for text input
|
# Handle expires_at parsing for text input
|
||||||
if params.dig(:rule, :expires_at).present?
|
if params.dig(:rule, :expires_at).present?
|
||||||
expires_at_str = params[:rule][:expires_at].strip
|
expires_at_str = params[:rule][:expires_at].strip
|
||||||
|
|||||||
36
app/javascript/controllers/rule_form_controller.js
Normal file
36
app/javascript/controllers/rule_form_controller.js
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import { Controller } from "@hotwired/stimulus"
|
||||||
|
|
||||||
|
export default class RuleFormController extends Controller {
|
||||||
|
static targets = ["actionSelect", "addHeaderSection", "expirationCheckbox", "expirationField"]
|
||||||
|
|
||||||
|
connect() {
|
||||||
|
this.updateActionSections()
|
||||||
|
}
|
||||||
|
|
||||||
|
updateActionSections() {
|
||||||
|
const selectedAction = this.actionSelectTarget.value
|
||||||
|
|
||||||
|
// Hide all action-specific sections
|
||||||
|
this.addHeaderSectionTarget.classList.add('hidden')
|
||||||
|
|
||||||
|
// Show relevant section based on action
|
||||||
|
switch(selectedAction) {
|
||||||
|
case 'add_header':
|
||||||
|
this.addHeaderSectionTarget.classList.remove('hidden')
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
toggleExpiration() {
|
||||||
|
if (this.expirationCheckboxTarget.checked) {
|
||||||
|
this.expirationFieldTarget.classList.remove('hidden')
|
||||||
|
} else {
|
||||||
|
this.expirationFieldTarget.classList.add('hidden')
|
||||||
|
// Clear the datetime field when unchecked
|
||||||
|
const datetimeInput = this.expirationFieldTarget.querySelector('input[type="datetime-local"]')
|
||||||
|
if (datetimeInput) {
|
||||||
|
datetimeInput.value = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
52
app/jobs/cleanup_old_events_job.rb
Normal file
52
app/jobs/cleanup_old_events_job.rb
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# CleanupOldEventsJob - Removes events older than the configured retention period
|
||||||
|
#
|
||||||
|
# This job runs periodically (hourly) to clean up old events based on the
|
||||||
|
# event_retention_days setting. This helps keep the database size manageable
|
||||||
|
# and improves query performance.
|
||||||
|
#
|
||||||
|
# The retention period is configurable via the 'event_retention_days' setting
|
||||||
|
# (default: 90 days). This allows administrators to balance between historical
|
||||||
|
# data retention and database performance.
|
||||||
|
#
|
||||||
|
# Schedule: Every hour (configured in config/recurring.yml)
|
||||||
|
class CleanupOldEventsJob < ApplicationJob
|
||||||
|
queue_as :background
|
||||||
|
|
||||||
|
def perform
|
||||||
|
retention_days = Setting.event_retention_days
|
||||||
|
|
||||||
|
# Don't delete if retention is set to 0 or negative (disabled)
|
||||||
|
if retention_days <= 0
|
||||||
|
Rails.logger.info "CleanupOldEventsJob: Event retention disabled (retention_days: #{retention_days})"
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
cutoff_date = retention_days.days.ago
|
||||||
|
|
||||||
|
# Count events to be deleted
|
||||||
|
old_events = Event.where('timestamp < ?', cutoff_date)
|
||||||
|
count = old_events.count
|
||||||
|
|
||||||
|
if count.zero?
|
||||||
|
Rails.logger.info "CleanupOldEventsJob: No events older than #{retention_days} days found"
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
Rails.logger.info "CleanupOldEventsJob: Deleting #{count} events older than #{retention_days} days (before #{cutoff_date})"
|
||||||
|
|
||||||
|
# Delete in batches to avoid long-running transactions
|
||||||
|
deleted_count = 0
|
||||||
|
batch_size = 10_000
|
||||||
|
|
||||||
|
old_events.in_batches(of: batch_size) do |batch|
|
||||||
|
batch_count = batch.delete_all
|
||||||
|
deleted_count += batch_count
|
||||||
|
Rails.logger.info "CleanupOldEventsJob: Deleted batch of #{batch_count} events (total: #{deleted_count}/#{count})"
|
||||||
|
end
|
||||||
|
|
||||||
|
Rails.logger.info "CleanupOldEventsJob: Successfully deleted #{deleted_count} events"
|
||||||
|
deleted_count
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -15,31 +15,14 @@ class FetchIpapiDataJob < ApplicationJob
|
|||||||
ipapi_data = Ipapi.lookup(sample_ip)
|
ipapi_data = Ipapi.lookup(sample_ip)
|
||||||
|
|
||||||
if ipapi_data.present? && !ipapi_data.key?('error')
|
if ipapi_data.present? && !ipapi_data.key?('error')
|
||||||
# Check if IPAPI returned a different route than our tracking network
|
# Process IPAPI data and create network ranges
|
||||||
ipapi_route = ipapi_data.dig('asn', 'route')
|
result = Ipapi.process_ipapi_data(ipapi_data, tracking_network)
|
||||||
target_network = tracking_network
|
|
||||||
|
|
||||||
if ipapi_route.present? && ipapi_route != tracking_network.cidr
|
# Mark the tracking network as having been queried
|
||||||
# IPAPI returned a different CIDR - find or create that network range
|
# Use the broadest CIDR returned for deduplication
|
||||||
Rails.logger.info "IPAPI returned different route: #{ipapi_route} (requested: #{tracking_network.cidr})"
|
tracking_network.mark_ipapi_queried!(result[:broadest_cidr])
|
||||||
|
|
||||||
target_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
|
Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (created #{result[:networks].length} networks)"
|
||||||
nr.source = 'api_imported'
|
|
||||||
nr.creation_reason = "Created from IPAPI lookup for #{tracking_network.cidr}"
|
|
||||||
end
|
|
||||||
|
|
||||||
Rails.logger.info "Storing IPAPI data on correct network: #{target_network.cidr}"
|
|
||||||
end
|
|
||||||
|
|
||||||
# Store data on the target network (wherever IPAPI said it belongs)
|
|
||||||
target_network.set_network_data(:ipapi, ipapi_data)
|
|
||||||
target_network.last_api_fetch = Time.current
|
|
||||||
target_network.save!
|
|
||||||
|
|
||||||
# Mark the tracking network as having been queried, with the CIDR that was returned
|
|
||||||
tracking_network.mark_ipapi_queried!(target_network.cidr)
|
|
||||||
|
|
||||||
Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (stored on #{target_network.cidr})"
|
|
||||||
|
|
||||||
# Broadcast to the tracking network
|
# Broadcast to the tracking network
|
||||||
broadcast_ipapi_update(tracking_network, ipapi_data)
|
broadcast_ipapi_update(tracking_network, ipapi_data)
|
||||||
|
|||||||
26
app/jobs/import_all_bot_network_ranges_job.rb
Normal file
26
app/jobs/import_all_bot_network_ranges_job.rb
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# ImportAllBotNetworkRangesJob - Background job for importing from all bot sources
|
||||||
|
class ImportAllBotNetworkRangesJob < ApplicationJob
|
||||||
|
queue_as :default
|
||||||
|
|
||||||
|
def perform(options = {})
|
||||||
|
Rails.logger.info "Starting batch import of all bot network ranges"
|
||||||
|
|
||||||
|
results = BotNetworkRangeImporter.import_all_sources(options)
|
||||||
|
|
||||||
|
# Send completion summary
|
||||||
|
Rails.logger.info "Batch import completed. Summary: #{results}"
|
||||||
|
|
||||||
|
# Broadcast summary to clients
|
||||||
|
ActionCable.server.broadcast(
|
||||||
|
"bot_imports",
|
||||||
|
{
|
||||||
|
type: 'batch_summary',
|
||||||
|
status: 'completed',
|
||||||
|
results: results,
|
||||||
|
message: "Batch import completed for all sources"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
47
app/jobs/import_bot_network_ranges_job.rb
Normal file
47
app/jobs/import_bot_network_ranges_job.rb
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# ImportBotNetworkRangesJob - Background job for importing bot network ranges
|
||||||
|
#
|
||||||
|
# Imports network ranges from official bot provider sources.
|
||||||
|
# Runs asynchronously to avoid blocking the web interface.
|
||||||
|
class ImportBotNetworkRangesJob < ApplicationJob
|
||||||
|
queue_as :default
|
||||||
|
|
||||||
|
def perform(source_key, options = {})
|
||||||
|
Rails.logger.info "Starting bot network range import for source: #{source_key}"
|
||||||
|
|
||||||
|
begin
|
||||||
|
result = BotNetworkRangeImporter.import_from_source(source_key, options)
|
||||||
|
|
||||||
|
# Send notification or log completion
|
||||||
|
Rails.logger.info "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
|
||||||
|
|
||||||
|
# Optionally broadcast via Turbo Streams for real-time updates
|
||||||
|
ActionCable.server.broadcast(
|
||||||
|
"bot_imports",
|
||||||
|
{
|
||||||
|
source: source_key,
|
||||||
|
status: 'completed',
|
||||||
|
imported: result[:imported],
|
||||||
|
message: "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.error "Bot network range import failed for #{source_key}: #{e.message}"
|
||||||
|
|
||||||
|
# Broadcast error notification
|
||||||
|
ActionCable.server.broadcast(
|
||||||
|
"bot_imports",
|
||||||
|
{
|
||||||
|
source: source_key,
|
||||||
|
status: 'error',
|
||||||
|
error: e.message,
|
||||||
|
message: "Failed to import from #{source_key}: #{e.message}"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
raise e
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -53,16 +53,15 @@ class ProcessWafEventJob < ApplicationJob
|
|||||||
# Queue IPAPI enrichment based on /24 tracking
|
# Queue IPAPI enrichment based on /24 tracking
|
||||||
# The tracking network is the /24 that stores ipapi_queried_at
|
# The tracking network is the /24 that stores ipapi_queried_at
|
||||||
if NetworkRange.should_fetch_ipapi_for_ip?(event.ip_address)
|
if NetworkRange.should_fetch_ipapi_for_ip?(event.ip_address)
|
||||||
# Use tracking network for fetch status to avoid race conditions
|
# Atomically mark as fetching - this prevents duplicate jobs via database lock
|
||||||
if tracking_network.is_fetching_api_data?(:ipapi)
|
if tracking_network.mark_as_fetching_api_data!(:ipapi)
|
||||||
Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - already being fetched"
|
|
||||||
else
|
|
||||||
tracking_network.mark_as_fetching_api_data!(:ipapi)
|
|
||||||
Rails.logger.info "Queueing IPAPI fetch for IP #{event.ip_address} (tracking network: #{tracking_network.cidr})"
|
Rails.logger.info "Queueing IPAPI fetch for IP #{event.ip_address} (tracking network: #{tracking_network.cidr})"
|
||||||
FetchIpapiDataJob.perform_later(network_range_id: tracking_network.id)
|
FetchIpapiDataJob.perform_later(network_range_id: tracking_network.id)
|
||||||
|
else
|
||||||
|
Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - another job already started"
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried recently"
|
Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried or being fetched"
|
||||||
end
|
end
|
||||||
|
|
||||||
# Evaluate WAF policies inline if needed (lazy evaluation)
|
# Evaluate WAF policies inline if needed (lazy evaluation)
|
||||||
|
|||||||
89
app/jobs/sync_events_to_duckdb_job.rb
Normal file
89
app/jobs/sync_events_to_duckdb_job.rb
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Background job to sync events from PostgreSQL to DuckDB
|
||||||
|
# Runs every 5 minutes to keep analytics database up-to-date
|
||||||
|
# Uses watermark tracking to only sync new events
|
||||||
|
class SyncEventsToDuckdbJob < ApplicationJob
|
||||||
|
queue_as :default
|
||||||
|
|
||||||
|
# Key for storing last sync timestamp in Rails cache
|
||||||
|
WATERMARK_CACHE_KEY = "duckdb_last_sync_time"
|
||||||
|
WATERMARK_TTL = 1.week
|
||||||
|
|
||||||
|
# Overlap window to catch late-arriving events
|
||||||
|
SYNC_OVERLAP = 1.minute
|
||||||
|
|
||||||
|
def perform
|
||||||
|
service = AnalyticsDuckdbService.instance
|
||||||
|
|
||||||
|
# Determine where to start syncing
|
||||||
|
from_timestamp = determine_sync_start_time(service)
|
||||||
|
|
||||||
|
Rails.logger.info "[DuckDB Sync] Starting sync from #{from_timestamp}"
|
||||||
|
|
||||||
|
# Sync new events using PostgreSQL cursor + DuckDB Appender
|
||||||
|
# (setup_schema is called internally within sync_new_events)
|
||||||
|
count = service.sync_new_events(from_timestamp)
|
||||||
|
|
||||||
|
# Update watermark if we synced any events
|
||||||
|
if count > 0
|
||||||
|
update_last_sync_time
|
||||||
|
Rails.logger.info "[DuckDB Sync] Successfully synced #{count} events"
|
||||||
|
else
|
||||||
|
Rails.logger.info "[DuckDB Sync] No new events to sync"
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB Sync] Job failed: #{e.message}"
|
||||||
|
Rails.logger.error e.backtrace.join("\n")
|
||||||
|
raise # Re-raise to mark job as failed in Solid Queue
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Determine timestamp to start syncing from
|
||||||
|
# Strategy:
|
||||||
|
# 1. First run (DuckDB empty): sync from oldest PostgreSQL event
|
||||||
|
# 2. Subsequent runs: sync from last watermark with overlap
|
||||||
|
def determine_sync_start_time(service)
|
||||||
|
oldest_duckdb = service.oldest_event_timestamp
|
||||||
|
|
||||||
|
if oldest_duckdb.nil?
|
||||||
|
# DuckDB is empty - this is the first sync
|
||||||
|
# Start from oldest PostgreSQL event (or reasonable cutoff)
|
||||||
|
oldest_pg = Event.minimum(:timestamp)
|
||||||
|
|
||||||
|
if oldest_pg.nil?
|
||||||
|
# No events in PostgreSQL at all
|
||||||
|
Rails.logger.warn "[DuckDB Sync] No events found in PostgreSQL"
|
||||||
|
1.day.ago # Default to recent window
|
||||||
|
else
|
||||||
|
Rails.logger.info "[DuckDB Sync] First sync - starting from oldest event: #{oldest_pg}"
|
||||||
|
oldest_pg
|
||||||
|
end
|
||||||
|
else
|
||||||
|
# DuckDB has data - sync from last watermark with overlap
|
||||||
|
last_sync = Rails.cache.read(WATERMARK_CACHE_KEY)
|
||||||
|
|
||||||
|
if last_sync.nil?
|
||||||
|
# Watermark not in cache (maybe cache expired or restarted)
|
||||||
|
# Fall back to newest event in DuckDB
|
||||||
|
newest_duckdb = service.newest_event_timestamp
|
||||||
|
start_time = newest_duckdb ? newest_duckdb - SYNC_OVERLAP : oldest_duckdb
|
||||||
|
Rails.logger.info "[DuckDB Sync] Watermark not found, using newest DuckDB event: #{start_time}"
|
||||||
|
start_time
|
||||||
|
else
|
||||||
|
# Normal case: use watermark with overlap to catch late arrivals
|
||||||
|
start_time = last_sync - SYNC_OVERLAP
|
||||||
|
Rails.logger.debug "[DuckDB Sync] Using watermark: #{last_sync} (with #{SYNC_OVERLAP}s overlap)"
|
||||||
|
start_time
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Update last sync watermark in cache
|
||||||
|
def update_last_sync_time
|
||||||
|
now = Time.current
|
||||||
|
Rails.cache.write(WATERMARK_CACHE_KEY, now, expires_in: WATERMARK_TTL)
|
||||||
|
Rails.logger.debug "[DuckDB Sync] Updated watermark to #{now}"
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -9,11 +9,24 @@ class Event < ApplicationRecord
|
|||||||
has_one :waf_policy, through: :rule
|
has_one :waf_policy, through: :rule
|
||||||
|
|
||||||
# Enums for fixed value sets
|
# Enums for fixed value sets
|
||||||
|
# Canonical WAF action order - aligned with Rule and Agent models
|
||||||
|
#
|
||||||
|
# IMPORTANT: These values were swapped to match baffle-agent convention:
|
||||||
|
# - deny: 0 (blocked traffic)
|
||||||
|
# - allow: 1 (allowed traffic)
|
||||||
|
#
|
||||||
|
# When using raw integer values in queries:
|
||||||
|
# - waf_action = 0 -> denied/blocked requests
|
||||||
|
# - waf_action = 1 -> allowed requests
|
||||||
|
# - waf_action = 2 -> redirect requests
|
||||||
|
# - waf_action = 3 -> challenge requests
|
||||||
|
# - waf_action = 4 -> log-only requests
|
||||||
enum :waf_action, {
|
enum :waf_action, {
|
||||||
allow: 0, # allow/pass
|
deny: 0, # deny/block
|
||||||
deny: 1, # deny/block
|
allow: 1, # allow/pass
|
||||||
redirect: 2, # redirect
|
redirect: 2, # redirect
|
||||||
challenge: 3 # challenge (future implementation)
|
challenge: 3, # challenge (CAPTCHA, JS challenge, etc.)
|
||||||
|
log: 4 # log only, no action (monitoring mode)
|
||||||
}, default: :allow, scopes: false
|
}, default: :allow, scopes: false
|
||||||
|
|
||||||
enum :request_method, {
|
enum :request_method, {
|
||||||
@@ -42,7 +55,7 @@ class Event < ApplicationRecord
|
|||||||
scope :by_waf_action, ->(waf_action) { where(waf_action: waf_action) }
|
scope :by_waf_action, ->(waf_action) { where(waf_action: waf_action) }
|
||||||
scope :blocked, -> { where(waf_action: :deny) }
|
scope :blocked, -> { where(waf_action: :deny) }
|
||||||
scope :allowed, -> { where(waf_action: :allow) }
|
scope :allowed, -> { where(waf_action: :allow) }
|
||||||
scope :rate_limited, -> { where(waf_action: 'rate_limit') }
|
scope :logged, -> { where(waf_action: :log) }
|
||||||
|
|
||||||
# Tag-based filtering scopes using PostgreSQL array operators
|
# Tag-based filtering scopes using PostgreSQL array operators
|
||||||
scope :with_tag, ->(tag) { where("tags @> ARRAY[?]", tag.to_s) }
|
scope :with_tag, ->(tag) { where("tags @> ARRAY[?]", tag.to_s) }
|
||||||
@@ -92,6 +105,11 @@ class Event < ApplicationRecord
|
|||||||
joins(:network_range).where("network_ranges.network = ?", cidr)
|
joins(:network_range).where("network_ranges.network = ?", cidr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Bot filtering scopes
|
||||||
|
scope :bots, -> { where(is_bot: true) }
|
||||||
|
scope :humans, -> { where(is_bot: false) }
|
||||||
|
scope :exclude_bots, -> { where(is_bot: false) }
|
||||||
|
|
||||||
# Add association for the optional network_range_id
|
# Add association for the optional network_range_id
|
||||||
belongs_to :network_range, optional: true
|
belongs_to :network_range, optional: true
|
||||||
|
|
||||||
@@ -178,6 +196,9 @@ class Event < ApplicationRecord
|
|||||||
# Populate network intelligence from IP address
|
# Populate network intelligence from IP address
|
||||||
before_save :populate_network_intelligence, if: :should_populate_network_intelligence?
|
before_save :populate_network_intelligence, if: :should_populate_network_intelligence?
|
||||||
|
|
||||||
|
# Detect bot traffic using user agent and network intelligence
|
||||||
|
before_save :detect_bot_traffic, if: :should_detect_bot?
|
||||||
|
|
||||||
# Backfill network intelligence for all events
|
# Backfill network intelligence for all events
|
||||||
def self.backfill_network_intelligence!(batch_size: 10_000)
|
def self.backfill_network_intelligence!(batch_size: 10_000)
|
||||||
total = where(country: nil).count
|
total = where(country: nil).count
|
||||||
@@ -205,8 +226,8 @@ class Event < ApplicationRecord
|
|||||||
# Normalize headers in payload during import phase
|
# Normalize headers in payload during import phase
|
||||||
normalized_payload = normalize_payload_headers(payload)
|
normalized_payload = normalize_payload_headers(payload)
|
||||||
|
|
||||||
# Create the WAF request event
|
# Create the WAF request event with agent-provided tags
|
||||||
create!(
|
event = create!(
|
||||||
request_id: request_id,
|
request_id: request_id,
|
||||||
timestamp: parse_timestamp(normalized_payload["timestamp"]),
|
timestamp: parse_timestamp(normalized_payload["timestamp"]),
|
||||||
payload: normalized_payload,
|
payload: normalized_payload,
|
||||||
@@ -229,11 +250,18 @@ class Event < ApplicationRecord
|
|||||||
server_name: normalized_payload["server_name"],
|
server_name: normalized_payload["server_name"],
|
||||||
environment: normalized_payload["environment"],
|
environment: normalized_payload["environment"],
|
||||||
|
|
||||||
|
# Tags: start with agent-provided tags only
|
||||||
|
tags: normalized_payload["tags"] || [],
|
||||||
|
|
||||||
# WAF agent info
|
# WAF agent info
|
||||||
agent_version: normalized_payload.dig("agent", "version"),
|
agent_version: normalized_payload.dig("agent", "version"),
|
||||||
agent_name: normalized_payload.dig("agent", "name")
|
agent_name: normalized_payload.dig("agent", "name")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Apply rule tags using EventTagger service
|
||||||
|
EventTagger.tag_event(event)
|
||||||
|
|
||||||
|
event
|
||||||
end
|
end
|
||||||
|
|
||||||
# Normalize headers in payload to lower case during import phase
|
# Normalize headers in payload to lower case during import phase
|
||||||
@@ -326,7 +354,10 @@ class Event < ApplicationRecord
|
|||||||
|
|
||||||
def tags
|
def tags
|
||||||
# Use the dedicated tags column (array), fallback to payload during transition
|
# Use the dedicated tags column (array), fallback to payload during transition
|
||||||
super.presence || (payload&.dig("tags") || [])
|
# Ensure we always return an Array, even if payload has malformed data (e.g., {} instead of [])
|
||||||
|
result = super.presence || payload&.dig("tags")
|
||||||
|
return [] if result.nil?
|
||||||
|
result.is_a?(Array) ? result : []
|
||||||
end
|
end
|
||||||
|
|
||||||
def headers
|
def headers
|
||||||
@@ -339,15 +370,15 @@ class Event < ApplicationRecord
|
|||||||
end
|
end
|
||||||
|
|
||||||
def blocked?
|
def blocked?
|
||||||
waf_action.in?(['block', 'deny'])
|
waf_action == 'deny' # deny = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
def allowed?
|
def allowed?
|
||||||
waf_action.in?(['allow', 'pass'])
|
waf_action == 'allow' # allow = 1
|
||||||
end
|
end
|
||||||
|
|
||||||
def rate_limited?
|
def logged?
|
||||||
waf_action == 'rate_limit'
|
waf_action == 'log'
|
||||||
end
|
end
|
||||||
|
|
||||||
def challenged?
|
def challenged?
|
||||||
@@ -680,10 +711,88 @@ class Event < ApplicationRecord
|
|||||||
self.server_name = payload["server_name"]
|
self.server_name = payload["server_name"]
|
||||||
self.environment = payload["environment"]
|
self.environment = payload["environment"]
|
||||||
|
|
||||||
|
|
||||||
# Extract agent info
|
# Extract agent info
|
||||||
agent_data = payload.dig("agent") || {}
|
agent_data = payload.dig("agent") || {}
|
||||||
self.agent_version = agent_data["version"]
|
self.agent_version = agent_data["version"]
|
||||||
self.agent_name = agent_data["name"]
|
self.agent_name = agent_data["name"]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def should_detect_bot?
|
||||||
|
# Detect bots if user agent is present or if we have network intelligence
|
||||||
|
user_agent.present? || network_range_id.present?
|
||||||
|
end
|
||||||
|
|
||||||
|
def detect_bot_traffic
|
||||||
|
self.is_bot = bot_detected?
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.error "Failed to detect bot for event #{id}: #{e.message}"
|
||||||
|
self.is_bot = false # Default to non-bot on error
|
||||||
|
end
|
||||||
|
|
||||||
|
def bot_detected?
|
||||||
|
# Multi-signal bot detection approach with tagging:
|
||||||
|
# 1. User agent detection (DeviceDetector gem) - adds bot:name tag
|
||||||
|
# 2. Network range source matching (bot_import_* sources) - adds network tags
|
||||||
|
# 3. Fallback to datacenter classification for infrastructure-based detection
|
||||||
|
|
||||||
|
# Signal 1: User agent bot detection (uses DeviceDetector's built-in cache)
|
||||||
|
if user_agent.present?
|
||||||
|
begin
|
||||||
|
detector = DeviceDetector.new(user_agent)
|
||||||
|
if detector.bot?
|
||||||
|
# Add bot tag with specific bot name
|
||||||
|
bot_name = detector.bot_name&.downcase&.gsub(/\s+/, '_') || 'unknown'
|
||||||
|
add_tag("bot:#{bot_name}")
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.debug "DeviceDetector failed for user agent: #{e.message}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Signal 2: Network range from known bot sources
|
||||||
|
if network_range_id.present?
|
||||||
|
range = NetworkRange.find_by(id: network_range_id)
|
||||||
|
if range
|
||||||
|
# Check if the network range source indicates a bot import
|
||||||
|
if range.source&.start_with?('bot_import_')
|
||||||
|
# Extract bot type from source (e.g., 'bot_import_googlebot' -> 'googlebot')
|
||||||
|
bot_type = range.source.sub('bot_import_', '')
|
||||||
|
add_tag("bot:#{bot_type}")
|
||||||
|
add_tag("network:#{range.company&.downcase&.gsub(/\s+/, '_')}") if range.company.present?
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
# Check if the company is a known bot provider (from bot imports)
|
||||||
|
# Common bot companies: Google, Amazon, OpenAI, Cloudflare, Microsoft, etc.
|
||||||
|
known_bot_companies = ['googlebot', 'google bot', 'amazon', 'aws', 'openai',
|
||||||
|
'anthropic', 'cloudflare', 'microsoft', 'facebook',
|
||||||
|
'meta', 'apple', 'duckduckgo']
|
||||||
|
company_lower = company&.downcase
|
||||||
|
if company_lower && known_bot_companies.any? { |bot| company_lower.include?(bot) }
|
||||||
|
add_tag("bot:#{company_lower.gsub(/\s+/, '_')}")
|
||||||
|
add_tag("network:#{company_lower.gsub(/\s+/, '_')}")
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Signal 3: Datacenter traffic is often bot traffic
|
||||||
|
# However, this is less precise so we use it as a weaker signal
|
||||||
|
# Only mark as bot if datacenter AND has other suspicious characteristics
|
||||||
|
if is_datacenter && user_agent.present?
|
||||||
|
# Generic/common bot user agents in datacenter networks
|
||||||
|
ua_lower = user_agent.downcase
|
||||||
|
bot_keywords = ['bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python', 'go-http-client']
|
||||||
|
if bot_keywords.any? { |keyword| ua_lower.include?(keyword) }
|
||||||
|
add_tag("bot:datacenter")
|
||||||
|
add_tag("datacenter:true")
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Default: not a bot
|
||||||
|
false
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
628
app/models/event_ddb.rb
Normal file
628
app/models/event_ddb.rb
Normal file
@@ -0,0 +1,628 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'ostruct'
|
||||||
|
|
||||||
|
# EventDdb - DuckDB-backed analytics queries for events
|
||||||
|
# Provides an ActiveRecord-like interface for querying DuckDB events table
|
||||||
|
# Falls back to PostgreSQL Event model if DuckDB is unavailable
|
||||||
|
class EventDdb
|
||||||
|
class << self
|
||||||
|
# Get DuckDB service
|
||||||
|
def service
|
||||||
|
AnalyticsDuckdbService.instance
|
||||||
|
end
|
||||||
|
|
||||||
|
# Total events since timestamp
|
||||||
|
def count_since(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
|
||||||
|
result.first&.first || 0
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in count_since: #{e.message}"
|
||||||
|
nil # Fallback to PostgreSQL
|
||||||
|
end
|
||||||
|
|
||||||
|
# Event breakdown by WAF action
|
||||||
|
def breakdown_by_action(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT waf_action, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ?
|
||||||
|
GROUP BY waf_action
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash like ActiveRecord .group.count returns
|
||||||
|
# DuckDB returns integer enum values, map to string names
|
||||||
|
# 0=deny, 1=allow, 2=redirect, 3=challenge, 4=log
|
||||||
|
action_map = { 0 => "deny", 1 => "allow", 2 => "redirect", 3 => "challenge", 4 => "log" }
|
||||||
|
result.to_a.to_h { |row| [action_map[row[0]] || "unknown", row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top countries with event counts
|
||||||
|
def top_countries(start_time, limit = 10)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT country, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND country IS NOT NULL
|
||||||
|
GROUP BY country
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return array of [country, count] tuples like ActiveRecord
|
||||||
|
# DuckDB returns arrays: [country, count]
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_countries: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top blocked IPs
|
||||||
|
def top_blocked_ips(start_time, limit = 10)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT ip_address, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND waf_action = 0
|
||||||
|
GROUP BY ip_address
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# DuckDB returns arrays: [ip_address, count]
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_blocked_ips: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Hourly timeline aggregation
|
||||||
|
def hourly_timeline(start_time, end_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, end_time)
|
||||||
|
SELECT
|
||||||
|
DATE_TRUNC('hour', timestamp) as hour,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND timestamp < ?
|
||||||
|
GROUP BY hour
|
||||||
|
ORDER BY hour
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash with Time keys like ActiveRecord
|
||||||
|
# DuckDB returns arrays: [hour, count]
|
||||||
|
result.to_a.to_h { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in hourly_timeline: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top networks by traffic volume
|
||||||
|
# Returns array of arrays: [network_range_id, event_count, unique_ips]
|
||||||
|
def top_networks(start_time, limit = 50)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT
|
||||||
|
network_range_id,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND network_range_id IS NOT NULL
|
||||||
|
GROUP BY network_range_id
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
result.to_a
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_networks: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top companies
|
||||||
|
# Returns array of OpenStruct objects with: company, event_count, unique_ips, network_count
|
||||||
|
def top_companies(start_time, limit = 20)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT
|
||||||
|
company,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips,
|
||||||
|
COUNT(DISTINCT network_range_id) as network_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND company IS NOT NULL
|
||||||
|
GROUP BY company
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert arrays to OpenStruct for attribute access
|
||||||
|
result.to_a.map do |row|
|
||||||
|
OpenStruct.new(
|
||||||
|
company: row[0],
|
||||||
|
event_count: row[1],
|
||||||
|
unique_ips: row[2],
|
||||||
|
network_count: row[3]
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_companies: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top ASNs
|
||||||
|
# Returns array of OpenStruct objects with: asn, asn_org, event_count, unique_ips, network_count
|
||||||
|
def top_asns(start_time, limit = 15)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT
|
||||||
|
asn,
|
||||||
|
asn_org,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips,
|
||||||
|
COUNT(DISTINCT network_range_id) as network_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND asn IS NOT NULL
|
||||||
|
GROUP BY asn, asn_org
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert arrays to OpenStruct for attribute access
|
||||||
|
result.to_a.map do |row|
|
||||||
|
OpenStruct.new(
|
||||||
|
asn: row[0],
|
||||||
|
asn_org: row[1],
|
||||||
|
event_count: row[2],
|
||||||
|
unique_ips: row[3],
|
||||||
|
network_count: row[4]
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_asns: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Network type breakdown (datacenter, VPN, proxy, standard)
|
||||||
|
# Returns hash with network_type as key and hash of stats as value
|
||||||
|
def network_type_breakdown(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN is_datacenter THEN 'datacenter'
|
||||||
|
WHEN is_vpn THEN 'vpn'
|
||||||
|
WHEN is_proxy THEN 'proxy'
|
||||||
|
ELSE 'standard'
|
||||||
|
END as network_type,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips,
|
||||||
|
COUNT(DISTINCT network_range_id) as network_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ?
|
||||||
|
GROUP BY network_type
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert arrays to hash: network_type => { event_count, unique_ips, network_count }
|
||||||
|
result.to_a.to_h do |row|
|
||||||
|
[
|
||||||
|
row[0], # network_type
|
||||||
|
{
|
||||||
|
"event_count" => row[1],
|
||||||
|
"unique_ips" => row[2],
|
||||||
|
"network_count" => row[3]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_type_breakdown: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top countries with detailed stats (event count and unique IPs)
|
||||||
|
# Returns array of OpenStruct objects with: country, event_count, unique_ips
|
||||||
|
def top_countries_with_stats(start_time, limit = 15)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT
|
||||||
|
country,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND country IS NOT NULL
|
||||||
|
GROUP BY country
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert arrays to OpenStruct for attribute access
|
||||||
|
result.to_a.map do |row|
|
||||||
|
OpenStruct.new(
|
||||||
|
country: row[0],
|
||||||
|
event_count: row[1],
|
||||||
|
unique_ips: row[2]
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_countries_with_stats: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Network type stats with formatted output matching controller expectations
|
||||||
|
# Returns hash with type keys containing label, networks, events, unique_ips, percentage
|
||||||
|
def network_type_stats(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# Get total events for percentage calculation
|
||||||
|
total_result = conn.query("SELECT COUNT(*) as total FROM events WHERE timestamp >= ?", start_time)
|
||||||
|
total_events = total_result.first&.first || 0
|
||||||
|
|
||||||
|
# Get breakdown by network type
|
||||||
|
breakdown = network_type_breakdown(start_time)
|
||||||
|
return nil unless breakdown
|
||||||
|
|
||||||
|
# Format results with labels and percentages
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
{
|
||||||
|
'datacenter' => 'Datacenter',
|
||||||
|
'vpn' => 'VPN',
|
||||||
|
'proxy' => 'Proxy',
|
||||||
|
'standard' => 'Standard'
|
||||||
|
}.each do |type, label|
|
||||||
|
stats = breakdown[type]
|
||||||
|
event_count = stats ? stats["event_count"] : 0
|
||||||
|
|
||||||
|
results[type] = {
|
||||||
|
label: label,
|
||||||
|
networks: stats ? stats["network_count"] : 0,
|
||||||
|
events: event_count,
|
||||||
|
unique_ips: stats ? stats["unique_ips"] : 0,
|
||||||
|
percentage: total_events > 0 ? ((event_count.to_f / total_events) * 100).round(1) : 0
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
results
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_type_stats: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Network range traffic statistics
|
||||||
|
# Returns comprehensive stats for a given network range ID(s)
|
||||||
|
def network_traffic_stats(network_range_ids)
|
||||||
|
network_range_ids = Array(network_range_ids)
|
||||||
|
return nil if network_range_ids.empty?
|
||||||
|
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# Build IN clause with placeholders
|
||||||
|
placeholders = network_range_ids.map { "?" }.join(", ")
|
||||||
|
|
||||||
|
# Get all stats in a single query
|
||||||
|
result = conn.query(<<~SQL, *network_range_ids)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total_requests,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips,
|
||||||
|
SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as blocked_requests,
|
||||||
|
SUM(CASE WHEN waf_action = 1 THEN 1 ELSE 0 END) as allowed_requests
|
||||||
|
FROM events
|
||||||
|
WHERE network_range_id IN (#{placeholders})
|
||||||
|
SQL
|
||||||
|
|
||||||
|
stats_row = result.first
|
||||||
|
return nil unless stats_row
|
||||||
|
|
||||||
|
{
|
||||||
|
total_requests: stats_row[0] || 0,
|
||||||
|
unique_ips: stats_row[1] || 0,
|
||||||
|
blocked_requests: stats_row[2] || 0,
|
||||||
|
allowed_requests: stats_row[3] || 0
|
||||||
|
}
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_traffic_stats: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top paths for network range(s)
|
||||||
|
def network_top_paths(network_range_ids, limit = 10)
|
||||||
|
network_range_ids = Array(network_range_ids)
|
||||||
|
return nil if network_range_ids.empty?
|
||||||
|
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# Build IN clause with placeholders
|
||||||
|
placeholders = network_range_ids.map { "?" }.join(", ")
|
||||||
|
|
||||||
|
result = conn.query(<<~SQL, *network_range_ids, limit)
|
||||||
|
SELECT
|
||||||
|
request_path,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE network_range_id IN (#{placeholders})
|
||||||
|
AND request_path IS NOT NULL
|
||||||
|
GROUP BY request_path
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_top_paths: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top user agents for network range(s)
|
||||||
|
def network_top_user_agents(network_range_ids, limit = 5)
|
||||||
|
network_range_ids = Array(network_range_ids)
|
||||||
|
return nil if network_range_ids.empty?
|
||||||
|
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# Build IN clause with placeholders
|
||||||
|
placeholders = network_range_ids.map { "?" }.join(", ")
|
||||||
|
|
||||||
|
result = conn.query(<<~SQL, *network_range_ids, limit)
|
||||||
|
SELECT
|
||||||
|
user_agent,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE network_range_id IN (#{placeholders})
|
||||||
|
AND user_agent IS NOT NULL
|
||||||
|
GROUP BY user_agent
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_top_user_agents: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Full user agent tally for network range(s)
|
||||||
|
# Returns hash of user_agent => count for all agents in the network
|
||||||
|
def network_agent_tally(network_range_ids)
|
||||||
|
network_range_ids = Array(network_range_ids)
|
||||||
|
return nil if network_range_ids.empty?
|
||||||
|
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# Build IN clause with placeholders
|
||||||
|
placeholders = network_range_ids.map { "?" }.join(", ")
|
||||||
|
|
||||||
|
result = conn.query(<<~SQL, *network_range_ids)
|
||||||
|
SELECT
|
||||||
|
user_agent,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE network_range_id IN (#{placeholders})
|
||||||
|
AND user_agent IS NOT NULL
|
||||||
|
GROUP BY user_agent
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash matching Ruby .tally format
|
||||||
|
result.to_a.to_h { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in network_agent_tally: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Suspicious network activity patterns
|
||||||
|
# Detects high-volume networks, high deny rates, and distributed companies
|
||||||
|
def suspicious_patterns(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
# High volume networks (5x average)
|
||||||
|
avg_query = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT
|
||||||
|
AVG(event_count) as avg_events
|
||||||
|
FROM (
|
||||||
|
SELECT network_range_id, COUNT(*) as event_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND network_range_id IS NOT NULL
|
||||||
|
GROUP BY network_range_id
|
||||||
|
) network_stats
|
||||||
|
SQL
|
||||||
|
|
||||||
|
avg_events = avg_query.first&.first || 0
|
||||||
|
threshold = avg_events * 5
|
||||||
|
|
||||||
|
high_volume = conn.query(<<~SQL, start_time, threshold)
|
||||||
|
SELECT
|
||||||
|
network_range_id,
|
||||||
|
COUNT(*) as event_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND network_range_id IS NOT NULL
|
||||||
|
GROUP BY network_range_id
|
||||||
|
HAVING COUNT(*) > ?
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# High deny rate networks (>50% blocked, min 10 requests)
|
||||||
|
high_deny = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT
|
||||||
|
network_range_id,
|
||||||
|
SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as denied_count,
|
||||||
|
COUNT(*) as total_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND network_range_id IS NOT NULL
|
||||||
|
GROUP BY network_range_id
|
||||||
|
HAVING CAST(SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) > 0.5
|
||||||
|
AND COUNT(*) >= 10
|
||||||
|
ORDER BY denied_count DESC
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Distributed companies (appearing with 5+ unique IPs)
|
||||||
|
distributed_companies = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT
|
||||||
|
company,
|
||||||
|
COUNT(DISTINCT ip_address) as ip_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND company IS NOT NULL
|
||||||
|
GROUP BY company
|
||||||
|
HAVING COUNT(DISTINCT ip_address) > 5
|
||||||
|
ORDER BY ip_count DESC
|
||||||
|
LIMIT 10
|
||||||
|
SQL
|
||||||
|
|
||||||
|
{
|
||||||
|
high_volume: {
|
||||||
|
count: high_volume.to_a.length,
|
||||||
|
networks: high_volume.to_a.map { |row| row[0] } # network_range_id
|
||||||
|
},
|
||||||
|
high_deny_rate: {
|
||||||
|
count: high_deny.to_a.length,
|
||||||
|
network_ids: high_deny.to_a.map { |row| row[0] } # network_range_id
|
||||||
|
},
|
||||||
|
distributed_companies: distributed_companies.to_a.map { |row|
|
||||||
|
{
|
||||||
|
company: row[0], # company name
|
||||||
|
subnets: row[1] # ip_count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in suspicious_patterns: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Bot traffic analysis - breakdown of bot vs human traffic
|
||||||
|
def bot_traffic_breakdown(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT
|
||||||
|
is_bot,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ?
|
||||||
|
GROUP BY is_bot
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash: is_bot => { event_count, unique_ips }
|
||||||
|
# DuckDB returns arrays: [is_bot, event_count, unique_ips]
|
||||||
|
result.to_a.to_h do |row|
|
||||||
|
[
|
||||||
|
row[0] ? "bot" : "human", # row[0] = is_bot
|
||||||
|
{
|
||||||
|
"event_count" => row[1], # row[1] = event_count
|
||||||
|
"unique_ips" => row[2] # row[2] = unique_ips
|
||||||
|
}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in bot_traffic_breakdown: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Count human traffic (non-bot) since timestamp
|
||||||
|
def human_traffic_count(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND is_bot = false
|
||||||
|
SQL
|
||||||
|
|
||||||
|
result.first&.first || 0
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in human_traffic_count: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Count bot traffic since timestamp
|
||||||
|
def bot_traffic_count(start_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND is_bot = true
|
||||||
|
SQL
|
||||||
|
|
||||||
|
result.first&.first || 0
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in bot_traffic_count: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Top bot user agents
|
||||||
|
def top_bot_user_agents(start_time, limit = 20)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT
|
||||||
|
user_agent,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND is_bot = true AND user_agent IS NOT NULL
|
||||||
|
GROUP BY user_agent
|
||||||
|
ORDER BY event_count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# DuckDB returns arrays: [user_agent, event_count, unique_ips]
|
||||||
|
result.to_a.map do |row|
|
||||||
|
{
|
||||||
|
user_agent: row[0], # row[0] = user_agent
|
||||||
|
event_count: row[1], # row[1] = event_count
|
||||||
|
unique_ips: row[2] # row[2] = unique_ips
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in top_bot_user_agents: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Bot traffic timeline (hourly breakdown)
|
||||||
|
def bot_traffic_timeline(start_time, end_time)
|
||||||
|
service.with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, end_time)
|
||||||
|
SELECT
|
||||||
|
DATE_TRUNC('hour', timestamp) as hour,
|
||||||
|
SUM(CASE WHEN is_bot = true THEN 1 ELSE 0 END) as bot_count,
|
||||||
|
SUM(CASE WHEN is_bot = false THEN 1 ELSE 0 END) as human_count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND timestamp < ?
|
||||||
|
GROUP BY hour
|
||||||
|
ORDER BY hour
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash with Time keys
|
||||||
|
# DuckDB returns arrays: [hour, bot_count, human_count]
|
||||||
|
result.to_a.to_h do |row|
|
||||||
|
[
|
||||||
|
row[0], # row[0] = hour
|
||||||
|
{
|
||||||
|
"bot_count" => row[1], # row[1] = bot_count
|
||||||
|
"human_count" => row[2], # row[2] = human_count
|
||||||
|
"total" => row[1] + row[2]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[EventDdb] Error in bot_traffic_timeline: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -7,7 +7,11 @@
|
|||||||
# and classification flags (datacenter, proxy, VPN).
|
# and classification flags (datacenter, proxy, VPN).
|
||||||
class NetworkRange < ApplicationRecord
|
class NetworkRange < ApplicationRecord
|
||||||
# Sources for network range creation
|
# Sources for network range creation
|
||||||
SOURCES = %w[api_imported user_created manual auto_generated inherited geolite_asn geolite_country].freeze
|
SOURCES = %w[api_imported user_created manual auto_generated inherited geolite_asn geolite_country
|
||||||
|
bot_import_amazon_aws bot_import_google bot_import_microsoft_bing bot_import_anthropic
|
||||||
|
bot_import_openai_searchbot bot_import_openai_chatgpt_user bot_import_openai_gptbot
|
||||||
|
bot_import_cloudflare bot_import_facebook bot_import_applebot bot_import_duckduckgo
|
||||||
|
production_import].freeze
|
||||||
|
|
||||||
# Associations
|
# Associations
|
||||||
has_many :rules, dependent: :destroy
|
has_many :rules, dependent: :destroy
|
||||||
@@ -116,19 +120,19 @@ class NetworkRange < ApplicationRecord
|
|||||||
|
|
||||||
# Parent/child relationships
|
# Parent/child relationships
|
||||||
def parent_ranges
|
def parent_ranges
|
||||||
NetworkRange.where("?::inet << network AND masklen(network) < ?", network.to_s, prefix_length)
|
# Find networks that contain this network (less specific / shorter prefix)
|
||||||
.order("masklen(network) DESC")
|
# The << operator implicitly means the containing network has a shorter prefix
|
||||||
|
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
|
||||||
|
NetworkRange.where("?::inet << network", cidr)
|
||||||
|
.order("masklen(network) DESC") # Most specific parent first
|
||||||
end
|
end
|
||||||
|
|
||||||
def child_ranges
|
def child_ranges
|
||||||
NetworkRange.where("network >> ?::inet AND masklen(network) > ?", network.to_s, prefix_length)
|
# Find networks that are contained by this network (more specific / longer prefix)
|
||||||
.order("masklen(network) ASC")
|
# The >> operator implicitly means the contained network has a longer prefix
|
||||||
end
|
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
|
||||||
|
NetworkRange.where("?::inet >> network", cidr)
|
||||||
def sibling_ranges
|
.order("masklen(network) ASC") # Least specific child first
|
||||||
NetworkRange.where("masklen(network) = ?", prefix_length)
|
|
||||||
.where("network && ?::inet", network.to_s)
|
|
||||||
.where.not(id: id)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Find nearest parent with intelligence data
|
# Find nearest parent with intelligence data
|
||||||
@@ -158,13 +162,26 @@ class NetworkRange < ApplicationRecord
|
|||||||
end
|
end
|
||||||
|
|
||||||
def mark_as_fetching_api_data!(source)
|
def mark_as_fetching_api_data!(source)
|
||||||
self.network_data ||= {}
|
# Use database-level locking to prevent race conditions
|
||||||
self.network_data['fetching_status'] ||= {}
|
transaction do
|
||||||
self.network_data['fetching_status'][source.to_s] = {
|
# Reload with lock to get fresh data
|
||||||
'started_at' => Time.current.to_f,
|
lock!
|
||||||
'job_id' => SecureRandom.hex(8)
|
|
||||||
}
|
# Double-check that we're not already fetching
|
||||||
save!
|
if is_fetching_api_data?(source)
|
||||||
|
Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
self.network_data ||= {}
|
||||||
|
self.network_data['fetching_status'] ||= {}
|
||||||
|
self.network_data['fetching_status'][source.to_s] = {
|
||||||
|
'started_at' => Time.current.to_f,
|
||||||
|
'job_id' => SecureRandom.hex(8)
|
||||||
|
}
|
||||||
|
save!
|
||||||
|
true
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def clear_fetching_status!(source)
|
def clear_fetching_status!(source)
|
||||||
@@ -222,9 +239,29 @@ class NetworkRange < ApplicationRecord
|
|||||||
end
|
end
|
||||||
|
|
||||||
def agent_tally
|
def agent_tally
|
||||||
# Rails.cache.fetch("#{to_s}:agent_tally", expires_in: 5.minutes) do
|
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
|
||||||
events.map(&:user_agent).tally
|
# Use DuckDB for fast agent tally instead of loading all events into memory
|
||||||
# end
|
if persisted? && events_count > 0
|
||||||
|
# Include child network ranges to capture all traffic within this network block
|
||||||
|
network_ids = [id] + child_ranges.pluck(:id)
|
||||||
|
|
||||||
|
# Try DuckDB first for much faster aggregation
|
||||||
|
duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
|
||||||
|
duckdb_tally || {}
|
||||||
|
else
|
||||||
|
# Virtual network - fallback to PostgreSQL CIDR query
|
||||||
|
events.map(&:user_agent).tally
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper method to try DuckDB first, fall back to PostgreSQL
|
||||||
|
def with_duckdb_fallback(&block)
|
||||||
|
result = yield
|
||||||
|
result.nil? ? nil : result # Return result or nil to trigger fallback
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
|
||||||
|
nil # Return nil to trigger fallback
|
||||||
end
|
end
|
||||||
|
|
||||||
# Geographic lookup
|
# Geographic lookup
|
||||||
@@ -334,6 +371,9 @@ class NetworkRange < ApplicationRecord
|
|||||||
def self.should_fetch_ipapi_for_ip?(ip_address)
|
def self.should_fetch_ipapi_for_ip?(ip_address)
|
||||||
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
|
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
|
||||||
|
|
||||||
|
# Check if currently being fetched (prevents duplicate jobs)
|
||||||
|
return false if tracking_network.is_fetching_api_data?(:ipapi)
|
||||||
|
|
||||||
# Check if /24 has been queried recently
|
# Check if /24 has been queried recently
|
||||||
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
|
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
|
||||||
return true if queried_at.nil?
|
return true if queried_at.nil?
|
||||||
|
|||||||
@@ -6,12 +6,11 @@
|
|||||||
# Network rules are associated with NetworkRange objects for rich context.
|
# Network rules are associated with NetworkRange objects for rich context.
|
||||||
class Rule < ApplicationRecord
|
class Rule < ApplicationRecord
|
||||||
# Rule enums (prefix needed to avoid rate_limit collision)
|
# Rule enums (prefix needed to avoid rate_limit collision)
|
||||||
enum :waf_action, { allow: 0, deny: 1, rate_limit: 2, redirect: 3, log: 4, challenge: 5 }, prefix: :action
|
# Canonical WAF action order - aligned with Agent and Event models
|
||||||
|
# Note: allow and log actions can include headers/tags in metadata for automatic injection
|
||||||
|
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action
|
||||||
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
|
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
|
||||||
|
|
||||||
# Legacy string constants for backward compatibility
|
|
||||||
RULE_TYPES = %w[network rate_limit path_pattern].freeze
|
|
||||||
ACTIONS = %w[allow deny rate_limit redirect log challenge].freeze
|
|
||||||
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
|
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
|
||||||
|
|
||||||
# Associations
|
# Associations
|
||||||
@@ -27,14 +26,6 @@ class Rule < ApplicationRecord
|
|||||||
validates :enabled, inclusion: { in: [true, false] }
|
validates :enabled, inclusion: { in: [true, false] }
|
||||||
validates :source, inclusion: { in: SOURCES }
|
validates :source, inclusion: { in: SOURCES }
|
||||||
|
|
||||||
# Legacy enum definitions (disabled to prevent conflicts)
|
|
||||||
# enum :action, { allow: "allow", deny: "deny", rate_limit: "rate_limit", redirect: "redirect", log: "log", challenge: "challenge" }, scopes: false
|
|
||||||
# enum :rule_type, { network: "network", rate_limit: "rate_limit", path_pattern: "path_pattern" }, scopes: false
|
|
||||||
|
|
||||||
# Legacy validations for backward compatibility during transition
|
|
||||||
# validates :rule_type, presence: true, inclusion: { in: RULE_TYPES }, allow_nil: true
|
|
||||||
# validates :action, presence: true, inclusion: { in: ACTIONS }, allow_nil: true
|
|
||||||
|
|
||||||
# Custom validations
|
# Custom validations
|
||||||
validate :validate_conditions_by_type
|
validate :validate_conditions_by_type
|
||||||
validate :validate_metadata_by_action
|
validate :validate_metadata_by_action
|
||||||
@@ -147,6 +138,42 @@ class Rule < ApplicationRecord
|
|||||||
metadata&.dig('challenge_message')
|
metadata&.dig('challenge_message')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Tag-related methods
|
||||||
|
def tags
|
||||||
|
metadata_hash['tags'] || []
|
||||||
|
end
|
||||||
|
|
||||||
|
def tags=(new_tags)
|
||||||
|
self.metadata = metadata_hash.merge('tags' => Array(new_tags))
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_tag(tag)
|
||||||
|
current_tags = tags
|
||||||
|
return if current_tags.include?(tag.to_s)
|
||||||
|
|
||||||
|
self.metadata = metadata_hash.merge('tags' => (current_tags + [tag.to_s]))
|
||||||
|
end
|
||||||
|
|
||||||
|
def remove_tag(tag)
|
||||||
|
current_tags = tags
|
||||||
|
return unless current_tags.include?(tag.to_s)
|
||||||
|
|
||||||
|
self.metadata = metadata_hash.merge('tags' => (current_tags - [tag.to_s]))
|
||||||
|
end
|
||||||
|
|
||||||
|
def has_tag?(tag)
|
||||||
|
tags.include?(tag.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Headers for add_header action or metadata-based header injection
|
||||||
|
def headers
|
||||||
|
metadata_hash['headers'] || {}
|
||||||
|
end
|
||||||
|
|
||||||
|
def headers=(new_headers)
|
||||||
|
self.metadata = metadata_hash.merge('headers' => new_headers.to_h)
|
||||||
|
end
|
||||||
|
|
||||||
def related_surgical_rules
|
def related_surgical_rules
|
||||||
if surgical_block?
|
if surgical_block?
|
||||||
# Find the corresponding exception rule
|
# Find the corresponding exception rule
|
||||||
@@ -356,12 +383,12 @@ class Rule < ApplicationRecord
|
|||||||
[block_rule, exception_rule]
|
[block_rule, exception_rule]
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.create_rate_limit_rule(cidr, limit:, window:, user: nil, **options)
|
def self.create_rate_limit_rule(cidr, limit:, window:, user: nil, action: 'deny', **options)
|
||||||
network_range = NetworkRange.find_or_create_by_cidr(cidr, user: user, source: 'user_created')
|
network_range = NetworkRange.find_or_create_by_cidr(cidr, user: user, source: 'user_created')
|
||||||
|
|
||||||
create!(
|
create!(
|
||||||
waf_rule_type: 'rate_limit',
|
waf_rule_type: 'rate_limit',
|
||||||
waf_action: 'rate_limit',
|
waf_action: action, # Action to take when rate limit exceeded (deny, redirect, challenge, log)
|
||||||
network_range: network_range,
|
network_range: network_range,
|
||||||
conditions: { cidr: cidr, scope: 'ip' },
|
conditions: { cidr: cidr, scope: 'ip' },
|
||||||
metadata: {
|
metadata: {
|
||||||
@@ -514,10 +541,6 @@ class Rule < ApplicationRecord
|
|||||||
if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
|
if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
|
||||||
errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
|
errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
|
||||||
end
|
end
|
||||||
when "rate_limit"
|
|
||||||
unless metadata&.dig("limit").present? && metadata&.dig("window").present?
|
|
||||||
errors.add(:metadata, "must include 'limit' and 'window' for rate_limit action")
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -15,4 +15,9 @@ class Setting < ApplicationRecord
|
|||||||
def self.ipapi_key
|
def self.ipapi_key
|
||||||
get('ipapi_key', ENV['IPAPI_KEY'])
|
get('ipapi_key', ENV['IPAPI_KEY'])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Convenience method for event retention days (default: 90 days)
|
||||||
|
def self.event_retention_days
|
||||||
|
get('event_retention_days', '90').to_i
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord
|
|||||||
POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
|
POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
|
||||||
|
|
||||||
# Actions - what to do when traffic matches this policy
|
# Actions - what to do when traffic matches this policy
|
||||||
ACTIONS = %w[allow deny redirect challenge].freeze
|
ACTIONS = %w[allow deny redirect challenge log].freeze
|
||||||
|
|
||||||
# Associations
|
# Associations
|
||||||
belongs_to :user
|
belongs_to :user
|
||||||
|
|||||||
293
app/services/analytics_duckdb_service.rb
Normal file
293
app/services/analytics_duckdb_service.rb
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Service for managing DuckDB analytics database
|
||||||
|
# Provides fast analytical queries on events data using columnar storage
|
||||||
|
class AnalyticsDuckdbService
|
||||||
|
include Singleton
|
||||||
|
|
||||||
|
DUCKDB_PATH = Rails.root.join("storage", "analytics.duckdb").to_s
|
||||||
|
BATCH_SIZE = 10_000
|
||||||
|
|
||||||
|
# Execute block with connection, ensuring database and connection are closed afterward
|
||||||
|
def with_connection
|
||||||
|
db = DuckDB::Database.open(DUCKDB_PATH)
|
||||||
|
conn = db.connect
|
||||||
|
yield conn
|
||||||
|
ensure
|
||||||
|
conn&.close
|
||||||
|
db&.close
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create events table if it doesn't exist (must be called within with_connection block)
|
||||||
|
def setup_schema(conn)
|
||||||
|
conn.execute(<<~SQL)
|
||||||
|
CREATE TABLE IF NOT EXISTS events (
|
||||||
|
id BIGINT PRIMARY KEY,
|
||||||
|
timestamp TIMESTAMP NOT NULL,
|
||||||
|
ip_address VARCHAR,
|
||||||
|
network_range_id BIGINT,
|
||||||
|
country VARCHAR,
|
||||||
|
company VARCHAR,
|
||||||
|
asn INTEGER,
|
||||||
|
asn_org VARCHAR,
|
||||||
|
is_datacenter BOOLEAN,
|
||||||
|
is_vpn BOOLEAN,
|
||||||
|
is_proxy BOOLEAN,
|
||||||
|
is_bot BOOLEAN,
|
||||||
|
waf_action INTEGER,
|
||||||
|
request_path VARCHAR,
|
||||||
|
user_agent VARCHAR,
|
||||||
|
tags VARCHAR[]
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
|
||||||
|
Rails.logger.info "[DuckDB] Schema setup complete"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get timestamp of oldest event in DuckDB
|
||||||
|
# Returns nil if table is empty
|
||||||
|
def oldest_event_timestamp
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query("SELECT MIN(timestamp) as oldest FROM events")
|
||||||
|
first_row = result.first
|
||||||
|
first_row&.first # Returns the value or nil
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Error getting oldest timestamp: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get timestamp of newest event in DuckDB
|
||||||
|
# Returns nil if table is empty
|
||||||
|
def newest_event_timestamp
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query("SELECT MAX(timestamp) as newest FROM events")
|
||||||
|
first_row = result.first
|
||||||
|
first_row&.first # Returns the value or nil
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Error getting newest timestamp: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get maximum event ID already synced to DuckDB
|
||||||
|
def max_synced_id
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
|
||||||
|
first_row = result.first
|
||||||
|
first_row&.first || 0
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Error getting max ID: #{e.message}"
|
||||||
|
0
|
||||||
|
end
|
||||||
|
|
||||||
|
# Sync new events from PostgreSQL to DuckDB
|
||||||
|
# Uses PostgreSQL cursor for memory-efficient streaming
|
||||||
|
# Uses Appender API for fast bulk inserts
|
||||||
|
# Filters by ID to avoid duplicates
|
||||||
|
def sync_new_events(from_timestamp)
|
||||||
|
total_synced = 0
|
||||||
|
|
||||||
|
with_connection do |conn|
|
||||||
|
# Ensure table exists
|
||||||
|
setup_schema(conn)
|
||||||
|
|
||||||
|
# Get max ID already in DuckDB to avoid duplicates
|
||||||
|
max_id_result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
|
||||||
|
max_id = max_id_result.first&.first || 0
|
||||||
|
Rails.logger.info "[DuckDB] Syncing events from #{from_timestamp}, max_id=#{max_id}"
|
||||||
|
|
||||||
|
start_time = Time.current
|
||||||
|
appender = nil
|
||||||
|
batch_count = 0
|
||||||
|
|
||||||
|
begin
|
||||||
|
# Create initial appender
|
||||||
|
appender = conn.appender("events")
|
||||||
|
|
||||||
|
# Use PostgreSQL cursor for memory-efficient streaming
|
||||||
|
Event.where("timestamp >= ? AND id > ?", from_timestamp, max_id)
|
||||||
|
.select(
|
||||||
|
:id,
|
||||||
|
:timestamp,
|
||||||
|
:ip_address,
|
||||||
|
:network_range_id,
|
||||||
|
:country,
|
||||||
|
:company,
|
||||||
|
:asn,
|
||||||
|
:asn_org,
|
||||||
|
:is_datacenter,
|
||||||
|
:is_vpn,
|
||||||
|
:is_proxy,
|
||||||
|
:is_bot,
|
||||||
|
:waf_action,
|
||||||
|
:request_path,
|
||||||
|
:user_agent,
|
||||||
|
:tags
|
||||||
|
)
|
||||||
|
.order(:id)
|
||||||
|
.each_row(block_size: BATCH_SIZE) do |event_data|
|
||||||
|
# Unpack event data from cursor row (Hash from each_row)
|
||||||
|
begin
|
||||||
|
appender.append_row(
|
||||||
|
event_data["id"],
|
||||||
|
event_data["timestamp"],
|
||||||
|
event_data["ip_address"]&.to_s,
|
||||||
|
event_data["network_range_id"],
|
||||||
|
event_data["country"],
|
||||||
|
event_data["company"],
|
||||||
|
event_data["asn"],
|
||||||
|
event_data["asn_org"],
|
||||||
|
event_data["is_datacenter"],
|
||||||
|
event_data["is_vpn"],
|
||||||
|
event_data["is_proxy"],
|
||||||
|
event_data["is_bot"],
|
||||||
|
event_data["waf_action"],
|
||||||
|
event_data["request_path"],
|
||||||
|
event_data["user_agent"],
|
||||||
|
event_data["tags"] || []
|
||||||
|
)
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Error appending event #{event_data['id']}: #{e.message}"
|
||||||
|
Rails.logger.error "[DuckDB] event_data = #{event_data.inspect}"
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
|
||||||
|
batch_count += 1
|
||||||
|
total_synced += 1
|
||||||
|
|
||||||
|
# Flush and recreate appender every BATCH_SIZE events to avoid chunk overflow
|
||||||
|
if batch_count % BATCH_SIZE == 0
|
||||||
|
appender.close
|
||||||
|
appender = conn.appender("events")
|
||||||
|
Rails.logger.info "[DuckDB] Synced batch (total: #{total_synced} events)"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Close final appender
|
||||||
|
appender&.close
|
||||||
|
|
||||||
|
duration = Time.current - start_time
|
||||||
|
rate = total_synced / duration if duration > 0
|
||||||
|
Rails.logger.info "[DuckDB] Sync complete: #{total_synced} events in #{duration.round(2)}s (~#{rate&.round(0)} events/sec)"
|
||||||
|
rescue StandardError => e
|
||||||
|
appender&.close rescue nil # Ensure appender is closed on error
|
||||||
|
Rails.logger.error "[DuckDB] Error syncing events: #{e.message}"
|
||||||
|
Rails.logger.error e.backtrace.join("\n")
|
||||||
|
raise # Re-raise to be caught by outer rescue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
total_synced
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Sync failed: #{e.message}"
|
||||||
|
0
|
||||||
|
end
|
||||||
|
|
||||||
|
# Execute analytical query on DuckDB
|
||||||
|
def query(sql, *params)
|
||||||
|
with_connection do |conn|
|
||||||
|
conn.query(sql, *params)
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Query error: #{e.message}"
|
||||||
|
Rails.logger.error "SQL: #{sql}"
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get event count in DuckDB
|
||||||
|
def event_count
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query("SELECT COUNT(*) as count FROM events")
|
||||||
|
first_row = result.first
|
||||||
|
first_row&.first || 0
|
||||||
|
end
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[DuckDB] Error getting event count: #{e.message}"
|
||||||
|
0
|
||||||
|
end
|
||||||
|
|
||||||
|
# Analytics query: Total events since timestamp
|
||||||
|
def total_events_since(start_time)
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
|
||||||
|
result.first&.first || 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Analytics query: Event breakdown by WAF action
|
||||||
|
def event_breakdown_by_action(start_time)
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time)
|
||||||
|
SELECT waf_action, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ?
|
||||||
|
GROUP BY waf_action
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash like PostgreSQL returns
|
||||||
|
# DuckDB returns arrays: [waf_action, count]
|
||||||
|
result.to_a.to_h { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Analytics query: Top countries
|
||||||
|
def top_countries(start_time, limit = 10)
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT country, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND country IS NOT NULL
|
||||||
|
GROUP BY country
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# DuckDB returns arrays: [country, count]
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Analytics query: Top blocked IPs
|
||||||
|
def top_blocked_ips(start_time, limit = 10)
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, limit)
|
||||||
|
SELECT ip_address, COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND waf_action = 0
|
||||||
|
GROUP BY ip_address
|
||||||
|
ORDER BY count DESC
|
||||||
|
LIMIT ?
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# DuckDB returns arrays: [ip_address, count]
|
||||||
|
result.to_a.map { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Analytics query: Hourly timeline (events grouped by hour)
|
||||||
|
def hourly_timeline(start_time, end_time)
|
||||||
|
with_connection do |conn|
|
||||||
|
result = conn.query(<<~SQL, start_time, end_time)
|
||||||
|
SELECT
|
||||||
|
DATE_TRUNC('hour', timestamp) as hour,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM events
|
||||||
|
WHERE timestamp >= ? AND timestamp < ?
|
||||||
|
GROUP BY hour
|
||||||
|
ORDER BY hour
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Convert to hash with Time keys like PostgreSQL
|
||||||
|
# DuckDB returns arrays: [hour, count]
|
||||||
|
result.to_a.to_h { |row| [row[0], row[1]] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Close DuckDB connection (for cleanup/testing)
|
||||||
|
def close
|
||||||
|
@connection&.close
|
||||||
|
@connection = nil
|
||||||
|
end
|
||||||
|
end
|
||||||
579
app/services/bot_network_range_importer.rb
Normal file
579
app/services/bot_network_range_importer.rb
Normal file
@@ -0,0 +1,579 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# BotNetworkRangeImporter - Service for importing official bot network ranges
|
||||||
|
#
|
||||||
|
# Imports network ranges from official bot provider sources like:
|
||||||
|
# - Amazon AWS: https://ip-ranges.amazonaws.com/ip-ranges.json
|
||||||
|
# - Google: Official crawler IP lists
|
||||||
|
# - Microsoft/Bing: Bot network ranges
|
||||||
|
# - Anthropic: Service network ranges
|
||||||
|
# - OpenAI: Service network ranges
|
||||||
|
class BotNetworkRangeImporter
|
||||||
|
class ImportError < StandardError; end
|
||||||
|
|
||||||
|
# Official sources for bot network ranges
|
||||||
|
BOT_SOURCES = {
|
||||||
|
amazon_aws: {
|
||||||
|
name: 'Amazon AWS',
|
||||||
|
url: 'https://ip-ranges.amazonaws.com/ip-ranges.json',
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_aws_ranges,
|
||||||
|
description: 'Official AWS IP ranges including Amazonbot and other services'
|
||||||
|
},
|
||||||
|
google: {
|
||||||
|
name: 'Google',
|
||||||
|
# Note: These URLs may need to be updated based on current Google documentation
|
||||||
|
urls: [
|
||||||
|
'https://developers.google.com/search/docs/files/googlebot.json',
|
||||||
|
'https://developers.google.com/search/docs/files/special-crawlers.json'
|
||||||
|
],
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_google_ranges,
|
||||||
|
description: 'Googlebot and other Google crawler IP ranges'
|
||||||
|
},
|
||||||
|
microsoft_bing: {
|
||||||
|
name: 'Microsoft Bing',
|
||||||
|
# Note: Microsoft may require web scraping or API access
|
||||||
|
url: 'https://www.bing.com/toolbox/bingbot.json',
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_microsoft_ranges,
|
||||||
|
description: 'Bingbot and other Microsoft crawler IP ranges'
|
||||||
|
},
|
||||||
|
anthropic: {
|
||||||
|
name: 'Anthropic Claude',
|
||||||
|
# Note: Anthropic ranges may need manual updates or different approach
|
||||||
|
url: 'https://docs.anthropic.com/claude/reference/ip_ranges',
|
||||||
|
format: :html,
|
||||||
|
parser: :parse_anthropic_ranges,
|
||||||
|
description: 'Anthropic Claude API service IP ranges'
|
||||||
|
},
|
||||||
|
openai_searchbot: {
|
||||||
|
name: 'OpenAI SearchBot',
|
||||||
|
url: 'https://openai.com/searchbot.json',
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_openai_ranges,
|
||||||
|
description: 'OpenAI SearchBot for ChatGPT search features'
|
||||||
|
},
|
||||||
|
openai_chatgpt_user: {
|
||||||
|
name: 'OpenAI ChatGPT-User',
|
||||||
|
url: 'https://openai.com/chatgpt-user.json',
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_openai_ranges,
|
||||||
|
description: 'OpenAI ChatGPT-User for user actions in ChatGPT and Custom GPTs'
|
||||||
|
},
|
||||||
|
openai_gptbot: {
|
||||||
|
name: 'OpenAI GPTBot',
|
||||||
|
url: 'https://openai.com/gptbot.json',
|
||||||
|
format: :json,
|
||||||
|
parser: :parse_openai_ranges,
|
||||||
|
description: 'OpenAI GPTBot for training AI foundation models'
|
||||||
|
},
|
||||||
|
cloudflare: {
|
||||||
|
name: 'Cloudflare',
|
||||||
|
urls: [
|
||||||
|
'https://www.cloudflare.com/ips-v4',
|
||||||
|
'https://www.cloudflare.com/ips-v6'
|
||||||
|
],
|
||||||
|
format: :text,
|
||||||
|
parser: :parse_cloudflare_ranges,
|
||||||
|
description: 'Cloudflare network ranges including their crawlers and services'
|
||||||
|
},
|
||||||
|
facebook: {
|
||||||
|
name: 'Facebook/Meta',
|
||||||
|
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/',
|
||||||
|
format: :html,
|
||||||
|
parser: :parse_facebook_ranges,
|
||||||
|
description: 'Facebook/Meta crawlers and bots'
|
||||||
|
},
|
||||||
|
applebot: {
|
||||||
|
name: 'Applebot',
|
||||||
|
url: 'https://support.apple.com/en-us/HT204683',
|
||||||
|
format: :html,
|
||||||
|
parser: :parse_applebot_ranges,
|
||||||
|
description: 'Applebot crawler for Apple search and Siri'
|
||||||
|
},
|
||||||
|
duckduckgo: {
|
||||||
|
name: 'DuckDuckBot',
|
||||||
|
url: 'https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/',
|
||||||
|
format: :html,
|
||||||
|
parser: :parse_duckduckgo_ranges,
|
||||||
|
description: 'DuckDuckGo search crawler'
|
||||||
|
}
|
||||||
|
}.freeze
|
||||||
|
|
||||||
|
def self.import_from_source(source_key, options = {})
|
||||||
|
source = BOT_SOURCES[source_key.to_sym]
|
||||||
|
raise ImportError, "Unknown source: #{source_key}" unless source
|
||||||
|
|
||||||
|
puts "Importing bot network ranges from #{source[:name]}..."
|
||||||
|
|
||||||
|
case source[:parser]
|
||||||
|
when :parse_aws_ranges
|
||||||
|
parse_aws_ranges(source, options)
|
||||||
|
when :parse_google_ranges
|
||||||
|
parse_google_ranges(source, options)
|
||||||
|
when :parse_microsoft_ranges
|
||||||
|
parse_microsoft_ranges(source, options)
|
||||||
|
when :parse_anthropic_ranges
|
||||||
|
parse_anthropic_ranges(source, options)
|
||||||
|
when :parse_openai_ranges
|
||||||
|
parse_openai_ranges(source, options)
|
||||||
|
when :parse_cloudflare_ranges
|
||||||
|
parse_cloudflare_ranges(source, options)
|
||||||
|
when :parse_facebook_ranges
|
||||||
|
parse_facebook_ranges(source, options)
|
||||||
|
when :parse_applebot_ranges
|
||||||
|
parse_applebot_ranges(source, options)
|
||||||
|
when :parse_duckduckgo_ranges
|
||||||
|
parse_duckduckgo_ranges(source, options)
|
||||||
|
else
|
||||||
|
raise ImportError, "Unknown parser: #{source[:parser]}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.import_all_sources(options = {})
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
BOT_SOURCES.each do |source_key, source|
|
||||||
|
puts "\n" + "="*50
|
||||||
|
puts "Processing #{source[:name]}..."
|
||||||
|
puts "="*50
|
||||||
|
|
||||||
|
begin
|
||||||
|
results[source_key] = import_from_source(source_key, options)
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.error "Failed to import from #{source[:name]}: #{e.message}"
|
||||||
|
results[source_key] = { error: e.message, imported: 0 }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "\n" + "="*50
|
||||||
|
puts "Import Summary"
|
||||||
|
puts "="*50
|
||||||
|
|
||||||
|
results.each do |source, result|
|
||||||
|
if result[:error]
|
||||||
|
puts "#{source}: FAILED - #{result[:error]}"
|
||||||
|
else
|
||||||
|
puts "#{source}: SUCCESS - #{result[:imported]} ranges imported"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
results
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Amazon AWS IP ranges parser
|
||||||
|
def self.parse_aws_ranges(source, options = {})
|
||||||
|
require 'net/http'
|
||||||
|
require 'uri'
|
||||||
|
|
||||||
|
uri = URI.parse(source[:url])
|
||||||
|
http = Net::HTTP.new(uri.host, uri.port)
|
||||||
|
http.use_ssl = true
|
||||||
|
http.read_timeout = 30
|
||||||
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if uri.scheme == 'https'
|
||||||
|
|
||||||
|
response = http.get(uri.request_uri)
|
||||||
|
raise ImportError, "Failed to fetch AWS IP ranges: #{response.code}" unless response.code == '200'
|
||||||
|
|
||||||
|
data = JSON.parse(response.body)
|
||||||
|
imported_count = 0
|
||||||
|
batch_size = options[:batch_size] || 1000
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
# Filter for relevant services (can be customized)
|
||||||
|
relevant_services = options[:aws_services] || ['AMAZON', 'ROUTE53', 'EC2', 'CLOUDFRONT']
|
||||||
|
|
||||||
|
data['prefixes'].each do |prefix|
|
||||||
|
# Focus on relevant services and regions
|
||||||
|
next unless relevant_services.include?(prefix['service'])
|
||||||
|
|
||||||
|
network_range = {
|
||||||
|
network: prefix['ip_prefix'],
|
||||||
|
source: 'bot_import_amazon_aws',
|
||||||
|
asn: nil, # AWS doesn't provide ASN in this feed
|
||||||
|
asn_org: 'Amazon Web Services',
|
||||||
|
company: 'Amazon',
|
||||||
|
country: nil,
|
||||||
|
is_datacenter: true,
|
||||||
|
is_proxy: false,
|
||||||
|
is_vpn: false,
|
||||||
|
additional_data: {
|
||||||
|
aws_service: prefix['service'],
|
||||||
|
aws_region: prefix['region'],
|
||||||
|
aws_network_border_group: prefix['network_border_group'],
|
||||||
|
import_date: Time.current.iso8601
|
||||||
|
}.to_json
|
||||||
|
}
|
||||||
|
|
||||||
|
batch << network_range
|
||||||
|
|
||||||
|
if batch.size >= batch_size
|
||||||
|
imported_count += import_batch(batch, 'Amazon AWS')
|
||||||
|
batch = []
|
||||||
|
puts "Imported #{imported_count} AWS ranges..."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Import remaining records
|
||||||
|
if batch.any?
|
||||||
|
imported_count += import_batch(batch, 'Amazon AWS')
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Amazon AWS import completed: #{imported_count} ranges imported"
|
||||||
|
{ imported: imported_count, source: 'Amazon AWS' }
|
||||||
|
rescue Timeout::Error, Net::OpenTimeout => e
|
||||||
|
raise ImportError, "Network timeout while fetching AWS ranges: #{e.message}"
|
||||||
|
rescue JSON::ParserError => e
|
||||||
|
raise ImportError, "Failed to parse AWS JSON response: #{e.message}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Google crawler IP ranges parser
|
||||||
|
def self.parse_google_ranges(source, options = {})
|
||||||
|
imported_count = 0
|
||||||
|
|
||||||
|
# Try each potential URL
|
||||||
|
urls = Array(source[:urls] || source[:url])
|
||||||
|
|
||||||
|
urls.each do |url|
|
||||||
|
begin
|
||||||
|
puts "Attempting to fetch Google ranges from: #{url}"
|
||||||
|
|
||||||
|
uri = URI.parse(url)
|
||||||
|
http = Net::HTTP.new(uri.host, uri.port)
|
||||||
|
http.use_ssl = true
|
||||||
|
http.read_timeout = 30
|
||||||
|
|
||||||
|
response = http.get(uri.request_uri)
|
||||||
|
next unless response.code == '200'
|
||||||
|
|
||||||
|
data = JSON.parse(response.body)
|
||||||
|
|
||||||
|
batch_size = options[:batch_size] || 1000
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
# Parse Google crawler format (varies by file type)
|
||||||
|
if data.is_a?(Array)
|
||||||
|
data.each do |entry|
|
||||||
|
next unless entry['cidr'] || entry['prefix']
|
||||||
|
|
||||||
|
network_range = {
|
||||||
|
network: entry['cidr'] || entry['prefix'],
|
||||||
|
source: 'bot_import_google',
|
||||||
|
asn: nil,
|
||||||
|
asn_org: 'Google LLC',
|
||||||
|
company: 'Google',
|
||||||
|
country: nil,
|
||||||
|
is_datacenter: true,
|
||||||
|
is_proxy: false,
|
||||||
|
is_vpn: false,
|
||||||
|
additional_data: {
|
||||||
|
crawler_type: entry['crawler_type'] || 'unknown',
|
||||||
|
user_agent: entry['user_agent'],
|
||||||
|
import_date: Time.current.iso8601
|
||||||
|
}.to_json
|
||||||
|
}
|
||||||
|
|
||||||
|
batch << network_range
|
||||||
|
|
||||||
|
if batch.size >= batch_size
|
||||||
|
imported_count += import_batch(batch, 'Google')
|
||||||
|
batch = []
|
||||||
|
puts "Imported #{imported_count} Google ranges..."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Import remaining records
|
||||||
|
if batch.any?
|
||||||
|
imported_count += import_batch(batch, 'Google')
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Google import completed: #{imported_count} ranges imported"
|
||||||
|
return { imported: imported_count, source: 'Google' }
|
||||||
|
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.warn "Failed to fetch from #{url}: #{e.message}"
|
||||||
|
next
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
raise ImportError, "Failed to fetch Google crawler ranges from any URL"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Microsoft Bing crawler IP ranges parser
|
||||||
|
def self.parse_microsoft_ranges(source, options = {})
|
||||||
|
# Microsoft requires special handling as they may not provide direct JSON
|
||||||
|
# This is a placeholder implementation
|
||||||
|
|
||||||
|
puts "Microsoft Bing crawler import requires manual configuration or web scraping"
|
||||||
|
puts "Refer to: https://www.bing.com/webmaster/help/which-crawlers-does-bing-use"
|
||||||
|
|
||||||
|
{
|
||||||
|
imported: 0,
|
||||||
|
source: 'Microsoft Bing',
|
||||||
|
note: 'Manual configuration required - Microsoft does not provide direct IP range feeds'
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Anthropic service IP ranges parser
|
||||||
|
def self.parse_anthropic_ranges(source, options = {})
|
||||||
|
# Anthropic ranges may need to be manually configured
|
||||||
|
# This is a placeholder implementation
|
||||||
|
|
||||||
|
puts "Anthropic Claude service ranges require manual configuration"
|
||||||
|
puts "Refer to: https://docs.anthropic.com/claude/reference/ip_ranges"
|
||||||
|
|
||||||
|
{
|
||||||
|
imported: 0,
|
||||||
|
source: 'Anthropic',
|
||||||
|
note: 'Manual configuration required - Anthropic does not provide automated IP range feeds'
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# OpenAI crawler IP ranges parser
|
||||||
|
def self.parse_openai_ranges(source, options = {})
|
||||||
|
require 'net/http'
|
||||||
|
require 'uri'
|
||||||
|
|
||||||
|
uri = URI.parse(source[:url])
|
||||||
|
http = Net::HTTP.new(uri.host, uri.port)
|
||||||
|
http.use_ssl = true
|
||||||
|
http.read_timeout = 30
|
||||||
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if uri.scheme == 'https'
|
||||||
|
|
||||||
|
response = http.get(uri.request_uri)
|
||||||
|
raise ImportError, "Failed to fetch OpenAI IP ranges: #{response.code}" unless response.code == '200'
|
||||||
|
|
||||||
|
data = JSON.parse(response.body)
|
||||||
|
imported_count = 0
|
||||||
|
batch_size = options[:batch_size] || 1000
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
# Determine crawler type from source name
|
||||||
|
crawler_type = source[:name].gsub('OpenAI ', '').downcase
|
||||||
|
|
||||||
|
# Handle different OpenAI JSON formats
|
||||||
|
prefixes = data['prefixes'] || data
|
||||||
|
|
||||||
|
prefixes.each do |entry|
|
||||||
|
# OpenAI provides IP ranges as ipv4Prefix/ipv6Prefix or cidr/ip_prefix
|
||||||
|
ip_range = entry['ipv4Prefix'] || entry['ipv6Prefix'] || entry['cidr'] || entry['ip_prefix'] || entry['ip']
|
||||||
|
next unless ip_range
|
||||||
|
|
||||||
|
# Convert single IPs to /32 or /128
|
||||||
|
network = ip_range.include?('/') ? ip_range : "#{ip_range}/32"
|
||||||
|
|
||||||
|
network_range = {
|
||||||
|
network: network,
|
||||||
|
source: "bot_import_openai_#{crawler_type}",
|
||||||
|
asn: nil,
|
||||||
|
asn_org: 'OpenAI',
|
||||||
|
company: 'OpenAI',
|
||||||
|
country: nil,
|
||||||
|
is_datacenter: true,
|
||||||
|
is_proxy: false,
|
||||||
|
is_vpn: false,
|
||||||
|
additional_data: {
|
||||||
|
crawler_type: crawler_type,
|
||||||
|
crawler_purpose: crawler_purpose(crawler_type),
|
||||||
|
user_agent: openai_user_agent(crawler_type),
|
||||||
|
import_date: Time.current.iso8601,
|
||||||
|
source_url: source[:url]
|
||||||
|
}.to_json
|
||||||
|
}
|
||||||
|
|
||||||
|
batch << network_range
|
||||||
|
|
||||||
|
if batch.size >= batch_size
|
||||||
|
imported_count += import_batch(batch, "OpenAI #{crawler_type}")
|
||||||
|
batch = []
|
||||||
|
puts "Imported #{imported_count} OpenAI #{crawler_type} ranges..."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Import remaining records
|
||||||
|
if batch.any?
|
||||||
|
imported_count += import_batch(batch, "OpenAI #{crawler_type}")
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "OpenAI #{crawler_type} import completed: #{imported_count} ranges imported"
|
||||||
|
{ imported: imported_count, source: "OpenAI #{crawler_type}" }
|
||||||
|
rescue Timeout::Error, Net::OpenTimeout => e
|
||||||
|
raise ImportError, "Network timeout while fetching OpenAI #{crawler_type} ranges: #{e.message}"
|
||||||
|
rescue JSON::ParserError => e
|
||||||
|
raise ImportError, "Failed to parse OpenAI #{crawler_type} JSON response: #{e.message}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.import_batch(batch_data, source_name)
|
||||||
|
# Check for existing ranges to avoid duplicates
|
||||||
|
existing_networks = NetworkRange.where(network: batch_data.map { |d| d[:network] }).pluck(:network)
|
||||||
|
new_ranges = batch_data.reject { |d| existing_networks.include?(d[:network]) }
|
||||||
|
|
||||||
|
if new_ranges.any?
|
||||||
|
NetworkRange.insert_all(new_ranges)
|
||||||
|
puts "Imported #{new_ranges.size} new #{source_name} ranges (#{batch_data.size - new_ranges.size} duplicates skipped)"
|
||||||
|
else
|
||||||
|
puts "No new #{source_name} ranges to import (all duplicates)"
|
||||||
|
end
|
||||||
|
|
||||||
|
new_ranges.size
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.error "Failed to import #{source_name} batch: #{e.message}"
|
||||||
|
|
||||||
|
# Fallback to individual imports
|
||||||
|
imported = 0
|
||||||
|
new_ranges.each do |data|
|
||||||
|
begin
|
||||||
|
NetworkRange.create!(data)
|
||||||
|
imported += 1
|
||||||
|
rescue => individual_error
|
||||||
|
Rails.logger.error "Failed to import individual #{source_name} record: #{individual_error.message}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
imported
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper method to determine crawler purpose based on type
|
||||||
|
def self.crawler_purpose(crawler_type)
|
||||||
|
case crawler_type
|
||||||
|
when 'searchbot'
|
||||||
|
'Used to link to and surface websites in search results in ChatGPT\'s search features'
|
||||||
|
when 'chatgpt-user'
|
||||||
|
'User actions in ChatGPT and Custom GPTs, including GPT Actions'
|
||||||
|
when 'gptbot'
|
||||||
|
'Used to crawl content for training OpenAI\'s generative AI foundation models'
|
||||||
|
else
|
||||||
|
'Unknown purpose'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper method to get OpenAI user agent strings
|
||||||
|
def self.openai_user_agent(crawler_type)
|
||||||
|
case crawler_type
|
||||||
|
when 'searchbot'
|
||||||
|
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot'
|
||||||
|
when 'chatgpt-user'
|
||||||
|
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot'
|
||||||
|
when 'gptbot'
|
||||||
|
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot'
|
||||||
|
else
|
||||||
|
'Unknown user agent'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Cloudflare IP ranges parser
|
||||||
|
def self.parse_cloudflare_ranges(source, options = {})
|
||||||
|
require 'net/http'
|
||||||
|
require 'uri'
|
||||||
|
|
||||||
|
imported_count = 0
|
||||||
|
urls = Array(source[:urls])
|
||||||
|
batch_size = options[:batch_size] || 1000
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
urls.each do |url|
|
||||||
|
begin
|
||||||
|
puts "Fetching Cloudflare ranges from: #{url}"
|
||||||
|
|
||||||
|
uri = URI.parse(url)
|
||||||
|
http = Net::HTTP.new(uri.host, uri.port)
|
||||||
|
http.use_ssl = true
|
||||||
|
http.read_timeout = 30
|
||||||
|
|
||||||
|
response = http.get(uri.request_uri)
|
||||||
|
raise ImportError, "Failed to fetch Cloudflare ranges: #{response.code}" unless response.code == '200'
|
||||||
|
|
||||||
|
# Cloudflare provides plain text CIDR lists
|
||||||
|
# Handle both newline-separated and single-line formats
|
||||||
|
lines = response.body.include?("\n") ? response.body.split("\n") : response.body.split
|
||||||
|
ip_version = url.include?('ips-v4') ? 4 : 6
|
||||||
|
|
||||||
|
lines.each do |line|
|
||||||
|
line = line.strip
|
||||||
|
next if line.empty? || line.start_with?('#')
|
||||||
|
|
||||||
|
# Validate CIDR format
|
||||||
|
next unless line.match?(/\A[0-9a-fA-F:.]+\/\d+\z/)
|
||||||
|
|
||||||
|
network_range = {
|
||||||
|
network: line,
|
||||||
|
source: 'bot_import_cloudflare',
|
||||||
|
asn: nil,
|
||||||
|
asn_org: 'Cloudflare',
|
||||||
|
company: 'Cloudflare',
|
||||||
|
country: nil,
|
||||||
|
is_datacenter: true,
|
||||||
|
is_proxy: false,
|
||||||
|
is_vpn: false,
|
||||||
|
additional_data: {
|
||||||
|
ip_version: ip_version,
|
||||||
|
import_date: Time.current.iso8601,
|
||||||
|
source_url: url,
|
||||||
|
service_type: 'cdn_and_security'
|
||||||
|
}.to_json
|
||||||
|
}
|
||||||
|
|
||||||
|
batch << network_range
|
||||||
|
|
||||||
|
if batch.size >= batch_size
|
||||||
|
imported_count += import_batch(batch, 'Cloudflare')
|
||||||
|
batch = []
|
||||||
|
puts "Imported #{imported_count} Cloudflare ranges..."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.warn "Failed to fetch Cloudflare ranges from #{url}: #{e.message}"
|
||||||
|
next
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Import remaining records
|
||||||
|
if batch.any?
|
||||||
|
imported_count += import_batch(batch, 'Cloudflare')
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Cloudflare import completed: #{imported_count} ranges imported"
|
||||||
|
{ imported: imported_count, source: 'Cloudflare' }
|
||||||
|
end
|
||||||
|
|
||||||
|
# Facebook/Meta crawler ranges parser (placeholder)
|
||||||
|
def self.parse_facebook_ranges(source, options = {})
|
||||||
|
puts "Facebook/Meta crawler ranges require web scraping or manual configuration"
|
||||||
|
puts "Refer to: https://developers.facebook.com/docs/sharing/webmasters/crawler/"
|
||||||
|
|
||||||
|
{
|
||||||
|
imported: 0,
|
||||||
|
source: 'Facebook/Meta',
|
||||||
|
note: 'Manual configuration required - Facebook does not provide automated IP range feeds'
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Applebot crawler ranges parser (placeholder)
|
||||||
|
def self.parse_applebot_ranges(source, options = {})
|
||||||
|
puts "Applebot ranges require web scraping or manual configuration"
|
||||||
|
puts "Refer to: https://support.apple.com/en-us/HT204683"
|
||||||
|
|
||||||
|
{
|
||||||
|
imported: 0,
|
||||||
|
source: 'Applebot',
|
||||||
|
note: 'Manual configuration required - Apple does not provide automated IP range feeds'
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# DuckDuckBot crawler ranges parser (placeholder)
|
||||||
|
def self.parse_duckduckgo_ranges(source, options = {})
|
||||||
|
puts "DuckDuckBot ranges require web scraping or manual configuration"
|
||||||
|
puts "Refer to: https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/"
|
||||||
|
|
||||||
|
{
|
||||||
|
imported: 0,
|
||||||
|
source: 'DuckDuckBot',
|
||||||
|
note: 'Manual configuration required - DuckDuckGo does not provide automated IP range feeds'
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -41,10 +41,11 @@ class EventNormalizer
|
|||||||
return unless raw_action.present?
|
return unless raw_action.present?
|
||||||
|
|
||||||
action_enum = case raw_action.to_s.downcase
|
action_enum = case raw_action.to_s.downcase
|
||||||
when 'allow', 'pass' then :allow
|
|
||||||
when 'deny', 'block' then :deny
|
when 'deny', 'block' then :deny
|
||||||
when 'challenge' then :challenge
|
when 'allow', 'pass' then :allow
|
||||||
when 'redirect' then :redirect
|
when 'redirect' then :redirect
|
||||||
|
when 'challenge' then :challenge
|
||||||
|
when 'log', 'monitor' then :log
|
||||||
else :allow
|
else :allow
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -53,4 +53,107 @@ class Ipapi
|
|||||||
next
|
next
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Parse company/datacenter network range from IPAPI data
|
||||||
|
# Handles "X.X.X.X - Y.Y.Y.Y" format and converts to CIDR
|
||||||
|
def self.parse_company_network_range(ipapi_data)
|
||||||
|
# Try company.network first, then datacenter.network
|
||||||
|
network_range = ipapi_data.dig('company', 'network') || ipapi_data.dig('datacenter', 'network')
|
||||||
|
return nil if network_range.blank?
|
||||||
|
|
||||||
|
# Parse "X.X.X.X - Y.Y.Y.Y" format
|
||||||
|
if network_range.include?(' - ')
|
||||||
|
start_ip_str, end_ip_str = network_range.split(' - ').map(&:strip)
|
||||||
|
|
||||||
|
begin
|
||||||
|
start_ip = IPAddr.new(start_ip_str)
|
||||||
|
end_ip = IPAddr.new(end_ip_str)
|
||||||
|
|
||||||
|
# Calculate the number of IPs in the range
|
||||||
|
num_ips = end_ip.to_i - start_ip.to_i + 1
|
||||||
|
|
||||||
|
# Calculate prefix length from number of IPs
|
||||||
|
# num_ips = 2^(32 - prefix_length) for IPv4
|
||||||
|
prefix_length = 32 - Math.log2(num_ips).to_i
|
||||||
|
|
||||||
|
# Verify it's a valid CIDR block (power of 2)
|
||||||
|
if 2**(32 - prefix_length) == num_ips
|
||||||
|
cidr = "#{start_ip_str}/#{prefix_length}"
|
||||||
|
Rails.logger.debug "Parsed company network range: #{network_range} -> #{cidr}"
|
||||||
|
return cidr
|
||||||
|
else
|
||||||
|
Rails.logger.warn "Network range #{network_range} is not a valid CIDR block (#{num_ips} IPs)"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
rescue IPAddr::InvalidAddressError => e
|
||||||
|
Rails.logger.error "Invalid IP in company network range: #{network_range} (#{e.message})"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
elsif network_range.include?('/')
|
||||||
|
# Already in CIDR format
|
||||||
|
return network_range
|
||||||
|
else
|
||||||
|
Rails.logger.warn "Unknown network range format: #{network_range}"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Populate NetworkRange attributes from IPAPI data
|
||||||
|
def self.populate_network_attributes(network_range, ipapi_data)
|
||||||
|
network_range.asn = ipapi_data.dig('asn', 'asn')
|
||||||
|
network_range.asn_org = ipapi_data.dig('asn', 'org') || ipapi_data.dig('company', 'name')
|
||||||
|
network_range.company = ipapi_data.dig('company', 'name')
|
||||||
|
network_range.country = ipapi_data.dig('location', 'country_code')
|
||||||
|
network_range.is_datacenter = ipapi_data['is_datacenter'] || false
|
||||||
|
network_range.is_vpn = ipapi_data['is_vpn'] || false
|
||||||
|
network_range.is_proxy = ipapi_data['is_proxy'] || false
|
||||||
|
end
|
||||||
|
|
||||||
|
# Process IPAPI data and create network ranges
|
||||||
|
# Returns array of created/updated NetworkRange objects
|
||||||
|
def self.process_ipapi_data(ipapi_data, tracking_network)
|
||||||
|
created_networks = []
|
||||||
|
|
||||||
|
# Extract and create company/datacenter network range if present
|
||||||
|
company_network_cidr = parse_company_network_range(ipapi_data)
|
||||||
|
if company_network_cidr.present?
|
||||||
|
company_range = NetworkRange.find_or_create_by(network: company_network_cidr) do |nr|
|
||||||
|
nr.source = 'api_imported'
|
||||||
|
nr.creation_reason = "Company allocation from IPAPI for #{tracking_network.cidr}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Always update attributes (whether new or existing)
|
||||||
|
populate_network_attributes(company_range, ipapi_data)
|
||||||
|
company_range.set_network_data(:ipapi, ipapi_data)
|
||||||
|
company_range.last_api_fetch = Time.current
|
||||||
|
company_range.save!
|
||||||
|
|
||||||
|
created_networks << company_range
|
||||||
|
Rails.logger.info "Created/updated company network: #{company_range.cidr}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Extract and create ASN route network if present
|
||||||
|
ipapi_route = ipapi_data.dig('asn', 'route')
|
||||||
|
if ipapi_route.present? && ipapi_route != tracking_network.cidr
|
||||||
|
route_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
|
||||||
|
nr.source = 'api_imported'
|
||||||
|
nr.creation_reason = "BGP route from IPAPI lookup for #{tracking_network.cidr}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Always update attributes (whether new or existing)
|
||||||
|
populate_network_attributes(route_network, ipapi_data)
|
||||||
|
route_network.set_network_data(:ipapi, ipapi_data)
|
||||||
|
route_network.last_api_fetch = Time.current
|
||||||
|
route_network.save!
|
||||||
|
|
||||||
|
created_networks << route_network
|
||||||
|
Rails.logger.info "Created/updated BGP route network: #{route_network.cidr}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return both the created networks and the broadest CIDR for deduplication
|
||||||
|
{
|
||||||
|
networks: created_networks,
|
||||||
|
broadest_cidr: company_network_cidr.presence || ipapi_route || tracking_network.cidr
|
||||||
|
}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
85
app/services/path_rule_matcher.rb
Normal file
85
app/services/path_rule_matcher.rb
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# PathRuleMatcher - Service to match Events against path_pattern Rules
|
||||||
|
#
|
||||||
|
# This service provides path pattern matching logic for evaluating whether
|
||||||
|
# an event matches a path_pattern rule. Used for hub-side testing and validation
|
||||||
|
# before agent deployment.
|
||||||
|
#
|
||||||
|
# Match Types:
|
||||||
|
# - exact: All segments must match exactly
|
||||||
|
# - prefix: Event path must start with rule segments
|
||||||
|
# - suffix: Event path must end with rule segments
|
||||||
|
# - contains: Rule segments must appear consecutively somewhere in event path
|
||||||
|
class PathRuleMatcher
|
||||||
|
def self.matches?(rule, event)
|
||||||
|
return false unless rule.path_pattern_rule?
|
||||||
|
return false if event.request_segment_ids.blank?
|
||||||
|
|
||||||
|
rule_segments = rule.path_segment_ids
|
||||||
|
event_segments = event.request_segment_ids
|
||||||
|
|
||||||
|
return false if rule_segments.blank?
|
||||||
|
|
||||||
|
case rule.path_match_type
|
||||||
|
when 'exact'
|
||||||
|
exact_match?(event_segments, rule_segments)
|
||||||
|
when 'prefix'
|
||||||
|
prefix_match?(event_segments, rule_segments)
|
||||||
|
when 'suffix'
|
||||||
|
suffix_match?(event_segments, rule_segments)
|
||||||
|
when 'contains'
|
||||||
|
contains_match?(event_segments, rule_segments)
|
||||||
|
else
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Find all path_pattern rules that match the given event
|
||||||
|
def self.matching_rules(event)
|
||||||
|
return [] if event.request_segment_ids.blank?
|
||||||
|
|
||||||
|
Rule.path_pattern_rules.active.select do |rule|
|
||||||
|
matches?(rule, event)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Evaluate an event against path rules and return the first matching action
|
||||||
|
def self.evaluate(event)
|
||||||
|
matching_rule = matching_rules(event).first
|
||||||
|
matching_rule&.waf_action || 'allow'
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Exact match: all segments must match exactly
|
||||||
|
# Example: [1, 2, 3] matches [1, 2, 3] only
|
||||||
|
def self.exact_match?(event_segments, rule_segments)
|
||||||
|
event_segments == rule_segments
|
||||||
|
end
|
||||||
|
|
||||||
|
# Prefix match: event path must start with rule segments
|
||||||
|
# Example: rule [1, 2] matches events [1, 2], [1, 2, 3], [1, 2, 3, 4]
|
||||||
|
def self.prefix_match?(event_segments, rule_segments)
|
||||||
|
return false if event_segments.length < rule_segments.length
|
||||||
|
event_segments[0...rule_segments.length] == rule_segments
|
||||||
|
end
|
||||||
|
|
||||||
|
# Suffix match: event path must end with rule segments
|
||||||
|
# Example: rule [2, 3] matches events [2, 3], [1, 2, 3], [0, 1, 2, 3]
|
||||||
|
def self.suffix_match?(event_segments, rule_segments)
|
||||||
|
return false if event_segments.length < rule_segments.length
|
||||||
|
event_segments[-rule_segments.length..-1] == rule_segments
|
||||||
|
end
|
||||||
|
|
||||||
|
# Contains match: rule segments must appear consecutively somewhere in event path
|
||||||
|
# Example: rule [2, 3] matches [1, 2, 3, 4], [2, 3], [0, 2, 3, 5]
|
||||||
|
def self.contains_match?(event_segments, rule_segments)
|
||||||
|
return false if event_segments.length < rule_segments.length
|
||||||
|
|
||||||
|
# Check if rule_segments appear consecutively anywhere in event_segments
|
||||||
|
(0..event_segments.length - rule_segments.length).any? do |i|
|
||||||
|
event_segments[i, rule_segments.length] == rule_segments
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -302,8 +302,15 @@
|
|||||||
<% @recent_events.first(3).each do |event| %>
|
<% @recent_events.first(3).each do |event| %>
|
||||||
<div class="flex items-center justify-between text-sm">
|
<div class="flex items-center justify-between text-sm">
|
||||||
<div class="flex items-center">
|
<div class="flex items-center">
|
||||||
<div class="w-2 h-2 rounded-full mr-2
|
<% dot_color = case event.waf_action
|
||||||
<%= event.waf_action == 'allow' ? 'bg-green-500' : 'bg-red-500' %>"></div>
|
when 'allow' then 'bg-green-500'
|
||||||
|
when 'deny' then 'bg-red-500'
|
||||||
|
when 'redirect' then 'bg-blue-500'
|
||||||
|
when 'challenge' then 'bg-yellow-500'
|
||||||
|
when 'log' then 'bg-gray-500'
|
||||||
|
else 'bg-gray-500'
|
||||||
|
end %>
|
||||||
|
<div class="w-2 h-2 rounded-full mr-2 <%= dot_color %>"></div>
|
||||||
<span class="text-gray-900 truncate max-w-[120px]"><%= event.ip_address %></span>
|
<span class="text-gray-900 truncate max-w-[120px]"><%= event.ip_address %></span>
|
||||||
</div>
|
</div>
|
||||||
<span class="text-gray-500"><%= time_ago_in_words(event.timestamp) %> ago</span>
|
<span class="text-gray-500"><%= time_ago_in_words(event.timestamp) %> ago</span>
|
||||||
|
|||||||
@@ -119,8 +119,15 @@
|
|||||||
<% @recent_events.first(3).each do |event| %>
|
<% @recent_events.first(3).each do |event| %>
|
||||||
<div class="flex items-center justify-between text-sm">
|
<div class="flex items-center justify-between text-sm">
|
||||||
<div class="flex items-center">
|
<div class="flex items-center">
|
||||||
<div class="w-2 h-2 rounded-full mr-2
|
<% dot_color = case event.waf_action
|
||||||
<%= event.waf_action == 'allow' ? 'bg-green-500' : 'bg-red-500' %>"></div>
|
when 'allow' then 'bg-green-500'
|
||||||
|
when 'deny' then 'bg-red-500'
|
||||||
|
when 'redirect' then 'bg-blue-500'
|
||||||
|
when 'challenge' then 'bg-yellow-500'
|
||||||
|
when 'log' then 'bg-gray-500'
|
||||||
|
else 'bg-gray-500'
|
||||||
|
end %>
|
||||||
|
<div class="w-2 h-2 rounded-full mr-2 <%= dot_color %>"></div>
|
||||||
<span class="text-gray-900 truncate max-w-[120px]"><%= event.ip_address %></span>
|
<span class="text-gray-900 truncate max-w-[120px]"><%= event.ip_address %></span>
|
||||||
</div>
|
</div>
|
||||||
<span class="text-gray-500"><%= time_ago_in_words(event.timestamp) %> ago</span>
|
<span class="text-gray-500"><%= time_ago_in_words(event.timestamp) %> ago</span>
|
||||||
|
|||||||
@@ -141,8 +141,11 @@
|
|||||||
class: "text-blue-600 hover:text-blue-800 hover:underline font-mono font-medium" %>
|
class: "text-blue-600 hover:text-blue-800 hover:underline font-mono font-medium" %>
|
||||||
</div>
|
</div>
|
||||||
<div class="text-xs text-gray-500">
|
<div class="text-xs text-gray-500">
|
||||||
<% if network.country.present? %>
|
<% if network.display_country.present? %>
|
||||||
🏳️ <%= network.country %>
|
🏳️ <%= network.display_country %>
|
||||||
|
<% if network.has_inherited_data? && network.display_country != network.country %>
|
||||||
|
<span class="text-blue-600" title="Inherited from parent network">*</span>
|
||||||
|
<% end %>
|
||||||
<% end %>
|
<% end %>
|
||||||
<% if network.asn.present? %>
|
<% if network.asn.present? %>
|
||||||
• ASN <%= network.asn %>
|
• ASN <%= network.asn %>
|
||||||
@@ -150,7 +153,15 @@
|
|||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
||||||
<%= network.company || 'Unknown' %>
|
<div>
|
||||||
|
<%= network.display_company || 'Unknown' %>
|
||||||
|
<% if network.has_inherited_data? %>
|
||||||
|
<div class="text-xs text-blue-600">
|
||||||
|
from <%= link_to network.inherited_from, network_range_path(NetworkRange.find_by(network: network.inherited_from)),
|
||||||
|
class: "text-blue-600 hover:text-blue-800 hover:underline" %>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="px-6 py-4 whitespace-nowrap text-sm">
|
<td class="px-6 py-4 whitespace-nowrap text-sm">
|
||||||
<% if network.is_datacenter? %>
|
<% if network.is_datacenter? %>
|
||||||
|
|||||||
171
app/views/bot_network_ranges/index.html.erb
Normal file
171
app/views/bot_network_ranges/index.html.erb
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
<% content_for :title, "Bot Network Ranges" %>
|
||||||
|
|
||||||
|
<div class="max-w-7xl mx-auto px-4 py-8">
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="mb-8">
|
||||||
|
<h1 class="text-3xl font-bold text-gray-900 mb-2">Bot Network Ranges</h1>
|
||||||
|
<p class="text-gray-600">Import and manage official network ranges for search crawlers and API bots</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Available Sources -->
|
||||||
|
<div class="bg-white shadow rounded-lg mb-8">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Available Sources</h2>
|
||||||
|
</div>
|
||||||
|
<div class="p-6">
|
||||||
|
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
|
||||||
|
<% @bot_sources.each do |key, source| %>
|
||||||
|
<div class="border rounded-lg p-4 hover:bg-gray-50 transition-colors">
|
||||||
|
<div class="flex items-start justify-between mb-2">
|
||||||
|
<h3 class="font-medium text-gray-900"><%= source[:name] %></h3>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded-full <%= source[:url] ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
|
||||||
|
<%= source[:url] ? 'Available' : 'Manual' %>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p class="text-sm text-gray-600 mb-4"><%= source[:description] %></p>
|
||||||
|
|
||||||
|
<div class="flex flex-wrap gap-2">
|
||||||
|
<%= form_with url: import_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
|
||||||
|
<%= hidden_field_tag :source, key %>
|
||||||
|
<%= f.submit "Import Now",
|
||||||
|
class: "px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded hover:bg-blue-700 transition-colors disabled:opacity-50",
|
||||||
|
disabled: !source[:url] %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<%= form_with url: import_async_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
|
||||||
|
<%= hidden_field_tag :source, key %>
|
||||||
|
<%= f.submit "Import Async",
|
||||||
|
class: "px-3 py-1 text-xs font-medium text-white bg-purple-600 rounded hover:bg-purple-700 transition-colors disabled:opacity-50",
|
||||||
|
disabled: !source[:url] %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<%= link_to "View", bot_network_range_path(key),
|
||||||
|
class: "px-3 py-1 text-xs font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Batch Import -->
|
||||||
|
<div class="bg-white shadow rounded-lg mb-8">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Batch Import</h2>
|
||||||
|
</div>
|
||||||
|
<div class="p-6">
|
||||||
|
<p class="text-gray-600 mb-4">Import from all available sources (this may take several minutes).</p>
|
||||||
|
|
||||||
|
<%= form_with url: import_all_bot_network_ranges_path, method: :post do |f| %>
|
||||||
|
<div class="flex items-center gap-4">
|
||||||
|
<%= f.submit "Import All Sources",
|
||||||
|
class: "px-6 py-2 font-medium text-white bg-green-600 rounded hover:bg-green-700 transition-colors",
|
||||||
|
confirm: "This will import from all available sources and may take several minutes. Continue?" %>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Recent Imports -->
|
||||||
|
<% if @recent_imports.any? %>
|
||||||
|
<div class="bg-white shadow rounded-lg mb-8">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Recent Imports</h2>
|
||||||
|
</div>
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="min-w-full divide-y divide-gray-200">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Status</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Records</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Notes</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="bg-white divide-y divide-gray-200">
|
||||||
|
<% @recent_imports.each do |import| %>
|
||||||
|
<tr>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
|
||||||
|
<%= import.source.titleize %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap">
|
||||||
|
<span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= import.status == 'completed' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
|
||||||
|
<%= import.status.titleize %>
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= import.records_processed&.to_s || '0' %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= import.created_at.strftime('%Y-%m-%d %H:%M') %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 text-sm text-gray-500">
|
||||||
|
<%= import.notes %>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<% end %>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<!-- Recent Bot Network Ranges -->
|
||||||
|
<% if @bot_network_ranges.any? %>
|
||||||
|
<div class="bg-white shadow rounded-lg">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Recently Imported Bot Ranges</h2>
|
||||||
|
</div>
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="min-w-full divide-y divide-gray-200">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="bg-white divide-y divide-gray-200">
|
||||||
|
<% @bot_network_ranges.each do |range| %>
|
||||||
|
<tr>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
|
||||||
|
<%= range.network %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.source.gsub('bot_import_', '').titleize %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.company || 'Unknown' %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 text-sm text-gray-500">
|
||||||
|
<% if range.additional_data.present? %>
|
||||||
|
<% data = JSON.parse(range.additional_data) rescue {} %>
|
||||||
|
<% if data['crawler_type'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
|
||||||
|
<%= data['crawler_type'].titleize %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% if data['aws_service'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
|
||||||
|
<%= data['aws_service'] %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% end %>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<% end %>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Real-time updates via Turbo Streams -->
|
||||||
|
<turbo-stream-source src="/cable" channel="BotImportsChannel"></turbo-stream-source>
|
||||||
175
app/views/bot_network_ranges/show.html.erb
Normal file
175
app/views/bot_network_ranges/show.html.erb
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
<% content_for :title, "#{@source_name} Network Ranges" %>
|
||||||
|
|
||||||
|
<div class="max-w-7xl mx-auto px-4 py-8">
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="mb-8">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<h1 class="text-3xl font-bold text-gray-900 mb-2"><%= @source_name %> Network Ranges</h1>
|
||||||
|
<p class="text-gray-600">Network ranges imported from <%= @source_name %> official sources</p>
|
||||||
|
</div>
|
||||||
|
<div class="flex space-x-3">
|
||||||
|
<%= link_to "Back to Sources", bot_network_ranges_path,
|
||||||
|
class: "px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
|
||||||
|
|
||||||
|
<%= form_with url: bot_network_range_path(params[:source]), method: :delete, class: "inline" do |f| %>
|
||||||
|
<%= f.submit "Delete All Ranges",
|
||||||
|
class: "px-4 py-2 text-sm font-medium text-white bg-red-600 rounded hover:bg-red-700 transition-colors",
|
||||||
|
confirm: "Are you sure you want to delete all #{@source_name} network ranges? This action cannot be undone." %>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Statistics -->
|
||||||
|
<% if @import_stats.any? %>
|
||||||
|
<div class="bg-white shadow rounded-lg mb-8">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Import Statistics</h2>
|
||||||
|
</div>
|
||||||
|
<div class="p-6">
|
||||||
|
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||||
|
<% @import_stats.each do |source, count| %>
|
||||||
|
<div class="text-center">
|
||||||
|
<div class="text-3xl font-bold text-blue-600"><%= count %></div>
|
||||||
|
<div class="text-sm text-gray-600 mt-1"><%= source.gsub('bot_import_', '').titleize %></div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<!-- Network Ranges Table -->
|
||||||
|
<div class="bg-white shadow rounded-lg">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Network Ranges</h2>
|
||||||
|
<div class="text-sm text-gray-500">
|
||||||
|
Showing <%= @network_ranges.offset_value + 1 %> to <%= [@network_ranges.offset_value + @network_ranges.current_page_count, @network_ranges.total_count].min %>
|
||||||
|
of <%= @network_ranges.total_count %> ranges
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="min-w-full divide-y divide-gray-200">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Country</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
|
||||||
|
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="bg-white divide-y divide-gray-200">
|
||||||
|
<% @network_ranges.each do |range| %>
|
||||||
|
<tr class="hover:bg-gray-50">
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
|
||||||
|
<%= link_to range.network, network_range_path(range), class: "text-blue-600 hover:text-blue-800" %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.source.gsub('bot_import_', '').titleize %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.company || 'Unknown' %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.country || 'Unknown' %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
||||||
|
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
|
||||||
|
</td>
|
||||||
|
<td class="px-6 py-4 text-sm text-gray-500">
|
||||||
|
<% if range.additional_data.present? %>
|
||||||
|
<% data = JSON.parse(range.additional_data) rescue {} %>
|
||||||
|
<div class="flex flex-wrap gap-1">
|
||||||
|
<% if data['crawler_type'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
|
||||||
|
<%= data['crawler_type'].titleize %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% if data['crawler_purpose'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-purple-100 text-purple-800" title="<%= data['crawler_purpose'] %>">
|
||||||
|
Purpose
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% if data['aws_service'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
|
||||||
|
<%= data['aws_service'] %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% if data['aws_region'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-green-100 text-green-800">
|
||||||
|
<%= data['aws_region'] %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
<% if data['ip_version'] %>
|
||||||
|
<span class="px-2 py-1 text-xs font-medium rounded bg-gray-100 text-gray-800">
|
||||||
|
IPv<%= data['ip_version'] %>
|
||||||
|
</span>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<% end %>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pagination -->
|
||||||
|
<% if @network_ranges.total_pages > 1 %>
|
||||||
|
<div class="px-6 py-4 border-t border-gray-200">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<div class="text-sm text-gray-700">
|
||||||
|
Page <%= @network_ranges.current_page %> of <%= @network_ranges.total_pages %>
|
||||||
|
</div>
|
||||||
|
<div class="flex space-x-2">
|
||||||
|
<% if @network_ranges.prev_page %>
|
||||||
|
<%= link_to "Previous", bot_network_range_path(params[:source], page: @network_ranges.prev_page),
|
||||||
|
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<%# Show page numbers %>
|
||||||
|
<% (1..@network_ranges.total_pages).select { |p| p == 1 || p == @network_ranges.total_pages || (p - @network_ranges.current_page).abs <= 2 }.each do |page| %>
|
||||||
|
<% if page == @network_ranges.current_page %>
|
||||||
|
<span class="px-3 py-1 text-sm font-medium text-white bg-blue-600 rounded">
|
||||||
|
<%= page %>
|
||||||
|
</span>
|
||||||
|
<% else %>
|
||||||
|
<%= link_to page, bot_network_range_path(params[:source], page: page),
|
||||||
|
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
|
||||||
|
<% end %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<% if @network_ranges.next_page %>
|
||||||
|
<%= link_to "Next", bot_network_range_path(params[:source], page: @network_ranges.next_page),
|
||||||
|
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<% if @network_ranges.empty? %>
|
||||||
|
<div class="bg-white shadow rounded-lg">
|
||||||
|
<div class="px-6 py-12 text-center">
|
||||||
|
<div class="text-gray-400 mb-4">
|
||||||
|
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h3 class="text-lg font-medium text-gray-900 mb-2">No network ranges found</h3>
|
||||||
|
<p class="text-gray-600 mb-6">
|
||||||
|
No <%= @source_name %> network ranges have been imported yet.
|
||||||
|
</p>
|
||||||
|
<%= link_to "Import #{@source_name} Ranges", bot_network_ranges_path,
|
||||||
|
class: "inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
<div>
|
<div>
|
||||||
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
|
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
|
||||||
<%= form.select :waf_action,
|
<%= form.select :waf_action,
|
||||||
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge']], params[:waf_action]),
|
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Log', 'log']], params[:waf_action]),
|
||||||
{ }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
|
{ }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
@@ -77,6 +77,20 @@
|
|||||||
placeholder: "e.g., 192.168.1.0/24" %>
|
placeholder: "e.g., 192.168.1.0/24" %>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<!-- Bot Filtering -->
|
||||||
|
<div class="mt-4 flex items-center">
|
||||||
|
<div class="flex items-center h-5">
|
||||||
|
<%= form.check_box :exclude_bots,
|
||||||
|
{ checked: params[:exclude_bots] == "true", class: "h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" },
|
||||||
|
"true", "false" %>
|
||||||
|
</div>
|
||||||
|
<div class="ml-3 text-sm">
|
||||||
|
<%= form.label :exclude_bots, class: "font-medium text-gray-700" do %>
|
||||||
|
Human Traffic Only
|
||||||
|
<span class="font-normal text-gray-500">(Exclude known bots and crawlers)</span>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<% end %>
|
<% end %>
|
||||||
</div>
|
</div>
|
||||||
@@ -178,9 +192,10 @@
|
|||||||
when 'deny' then 'bg-red-100 text-red-800'
|
when 'deny' then 'bg-red-100 text-red-800'
|
||||||
when 'redirect' then 'bg-blue-100 text-blue-800'
|
when 'redirect' then 'bg-blue-100 text-blue-800'
|
||||||
when 'challenge' then 'bg-yellow-100 text-yellow-800'
|
when 'challenge' then 'bg-yellow-100 text-yellow-800'
|
||||||
|
when 'add_header' then 'bg-purple-100 text-purple-800'
|
||||||
else 'bg-gray-100 text-gray-800'
|
else 'bg-gray-100 text-gray-800'
|
||||||
end %>">
|
end %>">
|
||||||
<%= event.waf_action %>
|
<%= event.waf_action.humanize %>
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
<td class="px-6 py-4 text-sm font-mono text-gray-900">
|
<td class="px-6 py-4 text-sm font-mono text-gray-900">
|
||||||
|
|||||||
@@ -75,6 +75,8 @@
|
|||||||
class: nav_link_class(network_ranges_path) %>
|
class: nav_link_class(network_ranges_path) %>
|
||||||
|
|
||||||
<% if user_signed_in? && current_user_admin? %>
|
<% if user_signed_in? && current_user_admin? %>
|
||||||
|
<%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
|
||||||
|
class: nav_link_class(bot_network_ranges_path) %>
|
||||||
<%= link_to "📊 Data Imports", data_imports_path,
|
<%= link_to "📊 Data Imports", data_imports_path,
|
||||||
class: nav_link_class(data_imports_path) %>
|
class: nav_link_class(data_imports_path) %>
|
||||||
<%= link_to "🔗 DSNs", dsns_path,
|
<%= link_to "🔗 DSNs", dsns_path,
|
||||||
@@ -172,6 +174,8 @@
|
|||||||
class: mobile_nav_link_class(network_ranges_path) %>
|
class: mobile_nav_link_class(network_ranges_path) %>
|
||||||
|
|
||||||
<% if user_signed_in? && current_user_admin? %>
|
<% if user_signed_in? && current_user_admin? %>
|
||||||
|
<%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
|
||||||
|
class: mobile_nav_link_class(bot_network_ranges_path) %>
|
||||||
<%= link_to "📊 Data Imports", data_imports_path,
|
<%= link_to "📊 Data Imports", data_imports_path,
|
||||||
class: mobile_nav_link_class(data_imports_path) %>
|
class: mobile_nav_link_class(data_imports_path) %>
|
||||||
<%= link_to "🔗 DSNs", dsns_path,
|
<%= link_to "🔗 DSNs", dsns_path,
|
||||||
|
|||||||
@@ -711,7 +711,15 @@
|
|||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="px-6 py-4 whitespace-nowrap">
|
<td class="px-6 py-4 whitespace-nowrap">
|
||||||
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium <%= event.waf_action == 'deny' ? 'bg-red-100 text-red-800' : 'bg-green-100 text-green-800' %>">
|
<% action_classes = case event.waf_action
|
||||||
|
when 'deny' then 'bg-red-100 text-red-800'
|
||||||
|
when 'allow' then 'bg-green-100 text-green-800'
|
||||||
|
when 'redirect' then 'bg-blue-100 text-blue-800'
|
||||||
|
when 'challenge' then 'bg-yellow-100 text-yellow-800'
|
||||||
|
when 'log' then 'bg-gray-100 text-gray-800'
|
||||||
|
else 'bg-gray-100 text-gray-800'
|
||||||
|
end %>
|
||||||
|
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium <%= action_classes %>">
|
||||||
<%= event.waf_action %>
|
<%= event.waf_action %>
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
|
|||||||
@@ -3,7 +3,15 @@
|
|||||||
<div class="flex items-center space-x-2 min-w-0 flex-1">
|
<div class="flex items-center space-x-2 min-w-0 flex-1">
|
||||||
<%= link_to rule, class: "flex items-center space-x-2 min-w-0 hover:text-blue-600" do %>
|
<%= link_to rule, class: "flex items-center space-x-2 min-w-0 hover:text-blue-600" do %>
|
||||||
<%# Action badge %>
|
<%# Action badge %>
|
||||||
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium <%= rule.waf_action == 'deny' ? 'bg-red-100 text-red-800' : rule.waf_action == 'allow' ? 'bg-green-100 text-green-800' : 'bg-blue-100 text-blue-800' %>">
|
<% action_classes = case rule.waf_action
|
||||||
|
when 'deny' then 'bg-red-100 text-red-800'
|
||||||
|
when 'allow' then 'bg-green-100 text-green-800'
|
||||||
|
when 'redirect' then 'bg-blue-100 text-blue-800'
|
||||||
|
when 'challenge' then 'bg-yellow-100 text-yellow-800'
|
||||||
|
when 'log' then 'bg-gray-100 text-gray-800'
|
||||||
|
else 'bg-gray-100 text-gray-800'
|
||||||
|
end %>
|
||||||
|
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium <%= action_classes %>">
|
||||||
<%= rule.waf_action.upcase %>
|
<%= rule.waf_action.upcase %>
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
|
|||||||
@@ -225,14 +225,16 @@
|
|||||||
<td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
|
<td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
|
||||||
<%= link_to "View", rule_path(rule), class: "text-blue-600 hover:text-blue-900 mr-3" %>
|
<%= link_to "View", rule_path(rule), class: "text-blue-600 hover:text-blue-900 mr-3" %>
|
||||||
<% if rule.enabled? %>
|
<% if rule.enabled? %>
|
||||||
<%= link_to "Disable", disable_rule_path(rule),
|
<%= button_to "Disable", disable_rule_path(rule),
|
||||||
method: :post,
|
method: :post,
|
||||||
data: { confirm: "Are you sure you want to disable this rule?" },
|
form: { style: "display: inline;" },
|
||||||
class: "text-yellow-600 hover:text-yellow-900 mr-3" %>
|
data: { turbo_confirm: "Are you sure you want to disable this rule?" },
|
||||||
|
class: "text-yellow-600 hover:text-yellow-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
|
||||||
<% else %>
|
<% else %>
|
||||||
<%= link_to "Enable", enable_rule_path(rule),
|
<%= button_to "Enable", enable_rule_path(rule),
|
||||||
method: :post,
|
method: :post,
|
||||||
class: "text-green-600 hover:text-green-900 mr-3" %>
|
form: { style: "display: inline;" },
|
||||||
|
class: "text-green-600 hover:text-green-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
|
||||||
<% end %>
|
<% end %>
|
||||||
<%= link_to "Edit", edit_rule_path(rule), class: "text-indigo-600 hover:text-indigo-900" %>
|
<%= link_to "Edit", edit_rule_path(rule), class: "text-indigo-600 hover:text-indigo-900" %>
|
||||||
</td>
|
</td>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<p class="mt-2 text-gray-600">Create a WAF rule to allow, block, or rate limit traffic</p>
|
<p class="mt-2 text-gray-600">Create a WAF rule to allow, block, or rate limit traffic</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="bg-white shadow rounded-lg">
|
<div class="bg-white shadow rounded-lg" data-controller="rule-form">
|
||||||
<%= form_with(model: @rule, local: true, class: "space-y-6") do |form| %>
|
<%= form_with(model: @rule, local: true, class: "space-y-6") do |form| %>
|
||||||
<% if @rule.errors.any? %>
|
<% if @rule.errors.any? %>
|
||||||
<div class="rounded-md bg-red-50 p-4">
|
<div class="rounded-md bg-red-50 p-4">
|
||||||
@@ -54,7 +54,8 @@
|
|||||||
<%= form.select :waf_action,
|
<%= form.select :waf_action,
|
||||||
options_for_select(@waf_actions.map { |action, _| [action.humanize, action] }, @rule.waf_action),
|
options_for_select(@waf_actions.map { |action, _| [action.humanize, action] }, @rule.waf_action),
|
||||||
{ prompt: "Select action" },
|
{ prompt: "Select action" },
|
||||||
{ class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
|
{ class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
|
||||||
|
data: { rule_form_target: "actionSelect", action: "change->rule-form#updateActionSections" } } %>
|
||||||
<p class="mt-2 text-sm text-gray-500">What action to take when this rule matches</p>
|
<p class="mt-2 text-sm text-gray-500">What action to take when this rule matches</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -197,10 +198,18 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
|
<div class="flex items-center mb-2">
|
||||||
<%= form.datetime_local_field :expires_at,
|
<%= check_box_tag :set_expiration, "1", false,
|
||||||
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
|
class: "h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500",
|
||||||
<p class="mt-2 text-sm text-gray-500">Leave blank for permanent rule</p>
|
data: { rule_form_target: "expirationCheckbox", action: "change->rule-form#toggleExpiration" } %>
|
||||||
|
<%= label_tag :set_expiration, "Set expiration", class: "ml-2 block text-sm font-medium text-gray-700" %>
|
||||||
|
</div>
|
||||||
|
<div class="hidden" data-rule-form-target="expirationField">
|
||||||
|
<%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
|
||||||
|
<%= form.datetime_local_field :expires_at,
|
||||||
|
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
|
||||||
|
<p class="mt-2 text-sm text-gray-500">When this rule should automatically expire</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="flex items-center pt-6">
|
<div class="flex items-center pt-6">
|
||||||
|
|||||||
@@ -39,12 +39,12 @@
|
|||||||
<div class="flex space-x-3">
|
<div class="flex space-x-3">
|
||||||
<%= link_to "Edit", edit_rule_path(@rule), class: "inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50" %>
|
<%= link_to "Edit", edit_rule_path(@rule), class: "inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50" %>
|
||||||
<% if @rule.enabled? %>
|
<% if @rule.enabled? %>
|
||||||
<%= link_to "Disable", disable_rule_path(@rule),
|
<%= button_to "Disable", disable_rule_path(@rule),
|
||||||
method: :post,
|
method: :post,
|
||||||
data: { confirm: "Are you sure you want to disable this rule?" },
|
data: { turbo_confirm: "Are you sure you want to disable this rule?" },
|
||||||
class: "inline-flex items-center px-4 py-2 border border-yellow-300 rounded-md shadow-sm text-sm font-medium text-yellow-700 bg-yellow-50 hover:bg-yellow-100" %>
|
class: "inline-flex items-center px-4 py-2 border border-yellow-300 rounded-md shadow-sm text-sm font-medium text-yellow-700 bg-yellow-50 hover:bg-yellow-100" %>
|
||||||
<% else %>
|
<% else %>
|
||||||
<%= link_to "Enable", enable_rule_path(@rule),
|
<%= button_to "Enable", enable_rule_path(@rule),
|
||||||
method: :post,
|
method: :post,
|
||||||
class: "inline-flex items-center px-4 py-2 border border-green-300 rounded-md shadow-sm text-sm font-medium text-green-700 bg-green-50 hover:bg-green-100" %>
|
class: "inline-flex items-center px-4 py-2 border border-green-300 rounded-md shadow-sm text-sm font-medium text-green-700 bg-green-50 hover:bg-green-100" %>
|
||||||
<% end %>
|
<% end %>
|
||||||
|
|||||||
@@ -36,7 +36,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<% end %>
|
<% end %>
|
||||||
|
|
||||||
<%= form_with url: session_url, class: "contents" do |form| %>
|
<%= form_with url: session_url, class: "contents", data: { turbo: false } do |form| %>
|
||||||
<div class="my-5">
|
<div class="my-5">
|
||||||
<%= form.email_field :email_address, required: true, autofocus: true, autocomplete: "username", placeholder: "Enter your email address", value: params[:email_address], class: "block shadow-sm rounded-md border border-gray-400 focus:outline-blue-600 px-3 py-2 mt-2 w-full" %>
|
<%= form.email_field :email_address, required: true, autofocus: true, autocomplete: "username", placeholder: "Enter your email address", value: params[:email_address], class: "block shadow-sm rounded-md border border-gray-400 focus:outline-blue-600 px-3 py-2 mt-2 w-full" %>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -50,11 +50,37 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Future Settings Section -->
|
<!-- Data Retention Settings -->
|
||||||
<div class="mt-6 bg-gray-50 shadow sm:rounded-lg">
|
<div class="mt-6 bg-white shadow sm:rounded-lg">
|
||||||
<div class="px-4 py-5 sm:p-6">
|
<div class="px-4 py-5 sm:p-6">
|
||||||
<h3 class="text-lg font-medium leading-6 text-gray-900 mb-2">Additional Settings</h3>
|
<h3 class="text-lg font-medium leading-6 text-gray-900 mb-4">Data Retention</h3>
|
||||||
<p class="text-sm text-gray-500">More configuration options will be added here as needed.</p>
|
|
||||||
|
<div class="mb-6">
|
||||||
|
<%= form_with url: settings_path, method: :patch, class: "space-y-4" do |f| %>
|
||||||
|
<%= hidden_field_tag :key, 'event_retention_days' %>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="event_retention_days" class="block text-sm font-medium text-gray-700">
|
||||||
|
Event Retention Period (days)
|
||||||
|
</label>
|
||||||
|
<div class="mt-1 flex rounded-md shadow-sm">
|
||||||
|
<%= number_field_tag :value,
|
||||||
|
@settings['event_retention_days']&.value || 90,
|
||||||
|
class: "flex-1 min-w-0 block w-full px-3 py-2 rounded-md border-gray-300 focus:ring-blue-500 focus:border-blue-500 sm:text-sm",
|
||||||
|
placeholder: "90",
|
||||||
|
min: 0 %>
|
||||||
|
<%= f.submit "Update", class: "ml-3 inline-flex items-center px-4 py-2 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" %>
|
||||||
|
</div>
|
||||||
|
<p class="mt-2 text-sm text-gray-500">
|
||||||
|
Events older than this many days will be automatically deleted by the cleanup job (runs hourly).
|
||||||
|
Set to 0 to disable automatic cleanup. Default: 90 days.
|
||||||
|
</p>
|
||||||
|
<p class="mt-1 text-xs text-gray-400">
|
||||||
|
Current setting: <strong><%= Setting.event_retention_days %> days</strong>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
6
config/initializers/device_detector.rb
Normal file
6
config/initializers/device_detector.rb
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Configure DeviceDetector cache
|
||||||
|
# Default is 5,000 entries - we increase to 10,000 for better hit rate
|
||||||
|
# Memory usage: ~1-2MB for 10k cached user agents
|
||||||
|
DeviceDetector.config.max_cache_keys = 10_000
|
||||||
5
config/initializers/version.rb
Normal file
5
config/initializers/version.rb
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module BaffleHub
|
||||||
|
VERSION = "0.3.0"
|
||||||
|
end
|
||||||
@@ -12,14 +12,26 @@
|
|||||||
# No recurring tasks configured yet
|
# No recurring tasks configured yet
|
||||||
# (previously had clear_solid_queue_finished_jobs, but now preserve_finished_jobs: false in queue.yml)
|
# (previously had clear_solid_queue_finished_jobs, but now preserve_finished_jobs: false in queue.yml)
|
||||||
|
|
||||||
# Backfill network intelligence for recent events (catches events before network data imported)
|
|
||||||
backfill_recent_network_intelligence:
|
|
||||||
class: BackfillRecentNetworkIntelligenceJob
|
|
||||||
queue: default
|
|
||||||
schedule: every 5 minutes
|
|
||||||
|
|
||||||
# Clean up failed jobs older than 1 day
|
# Clean up failed jobs older than 1 day
|
||||||
cleanup_failed_jobs:
|
cleanup_failed_jobs:
|
||||||
command: "SolidQueue::FailedExecution.where('created_at < ?', 1.day.ago).delete_all"
|
command: "SolidQueue::FailedExecution.where('created_at < ?', 1.day.ago).delete_all"
|
||||||
queue: background
|
queue: background
|
||||||
schedule: every 6 hours
|
schedule: every 6 hours
|
||||||
|
|
||||||
|
# Disable expired rules automatically
|
||||||
|
expired_rules_cleanup:
|
||||||
|
class: ExpiredRulesCleanupJob
|
||||||
|
queue: default
|
||||||
|
schedule: every hour
|
||||||
|
|
||||||
|
# Clean up old events based on retention setting
|
||||||
|
cleanup_old_events:
|
||||||
|
class: CleanupOldEventsJob
|
||||||
|
queue: background
|
||||||
|
schedule: every hour
|
||||||
|
|
||||||
|
# Sync events from PostgreSQL to DuckDB for fast analytics
|
||||||
|
sync_events_to_duckdb:
|
||||||
|
class: SyncEventsToDuckdbJob
|
||||||
|
queue: default
|
||||||
|
schedule: every 1 minutes
|
||||||
|
|||||||
@@ -93,4 +93,16 @@ Rails.application.routes.draw do
|
|||||||
get :progress
|
get :progress
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Bot network range management (admin only)
|
||||||
|
resources :bot_network_ranges, only: [:index, :show] do
|
||||||
|
collection do
|
||||||
|
post :import
|
||||||
|
post :import_async
|
||||||
|
post :import_all
|
||||||
|
end
|
||||||
|
member do
|
||||||
|
delete :destroy
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
151
db/migrate/20251116025003_align_waf_action_enums.rb
Normal file
151
db/migrate/20251116025003_align_waf_action_enums.rb
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
class AlignWafActionEnums < ActiveRecord::Migration[8.1]
|
||||||
|
def up
|
||||||
|
# Current enum mapping (BEFORE):
|
||||||
|
# allow: 0, deny: 1, rate_limit: 2, redirect: 3, log: 4, challenge: 5
|
||||||
|
#
|
||||||
|
# Target enum mapping (AFTER):
|
||||||
|
# deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4
|
||||||
|
#
|
||||||
|
# Strategy: Use temporary values to avoid conflicts during swap
|
||||||
|
|
||||||
|
say "Aligning WAF action enums to canonical order (deny:0, allow:1, redirect:2, challenge:3, log:4)"
|
||||||
|
|
||||||
|
# === Rules Table ===
|
||||||
|
say_with_time "Updating rules table..." do
|
||||||
|
# Temporarily disable triggers to avoid FK constraint issues during enum swap
|
||||||
|
execute "SET session_replication_role = replica;"
|
||||||
|
|
||||||
|
# Step 1: Move existing values to temporary range (100+)
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 0 THEN 100 -- allow -> temp(100)
|
||||||
|
WHEN waf_action = 1 THEN 101 -- deny -> temp(101)
|
||||||
|
WHEN waf_action = 2 THEN 102 -- rate_limit -> temp(102)
|
||||||
|
WHEN waf_action = 3 THEN 103 -- redirect -> temp(103)
|
||||||
|
WHEN waf_action = 4 THEN 104 -- log -> temp(104)
|
||||||
|
WHEN waf_action = 5 THEN 105 -- challenge -> temp(105)
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Step 2: Move from temporary to final positions
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 101 THEN 0 -- deny -> 0
|
||||||
|
WHEN waf_action = 100 THEN 1 -- allow -> 1
|
||||||
|
WHEN waf_action = 103 THEN 2 -- redirect -> 2
|
||||||
|
WHEN waf_action = 105 THEN 3 -- challenge -> 3
|
||||||
|
WHEN waf_action = 104 THEN 4 -- log -> 4
|
||||||
|
WHEN waf_action = 102 THEN 0 -- rate_limit -> deny (rate_limit is a rule_type, not action)
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Re-enable triggers
|
||||||
|
execute "SET session_replication_role = DEFAULT;"
|
||||||
|
|
||||||
|
# Return count without triggering model validations
|
||||||
|
connection.execute("SELECT COUNT(*) FROM rules").first["count"]
|
||||||
|
end
|
||||||
|
|
||||||
|
# === Events Table ===
|
||||||
|
say_with_time "Updating events table..." do
|
||||||
|
# Step 1: Move existing values to temporary range (100+)
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE events
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 0 THEN 100 -- allow -> temp(100)
|
||||||
|
WHEN waf_action = 1 THEN 101 -- deny -> temp(101)
|
||||||
|
WHEN waf_action = 2 THEN 102 -- redirect -> temp(102)
|
||||||
|
WHEN waf_action = 3 THEN 103 -- challenge -> temp(103)
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Step 2: Move from temporary to final positions
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE events
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 101 THEN 0 -- deny -> 0
|
||||||
|
WHEN waf_action = 100 THEN 1 -- allow -> 1
|
||||||
|
WHEN waf_action = 102 THEN 2 -- redirect -> 2
|
||||||
|
WHEN waf_action = 103 THEN 3 -- challenge -> 3
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return count without triggering model validations
|
||||||
|
connection.execute("SELECT COUNT(*) FROM events").first["count"]
|
||||||
|
end
|
||||||
|
|
||||||
|
say "Enum alignment complete!", true
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
# Reverse the migration - swap back to old order
|
||||||
|
say "Reverting WAF action enums to original order"
|
||||||
|
|
||||||
|
# === Rules Table ===
|
||||||
|
say_with_time "Reverting rules table..." do
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 0 THEN 100 -- deny -> temp(100)
|
||||||
|
WHEN waf_action = 1 THEN 101 -- allow -> temp(101)
|
||||||
|
WHEN waf_action = 2 THEN 102 -- redirect -> temp(102)
|
||||||
|
WHEN waf_action = 3 THEN 103 -- challenge -> temp(103)
|
||||||
|
WHEN waf_action = 4 THEN 104 -- log -> temp(104)
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 101 THEN 0 -- allow -> 0
|
||||||
|
WHEN waf_action = 100 THEN 1 -- deny -> 1
|
||||||
|
WHEN waf_action = 104 THEN 4 -- log -> 4
|
||||||
|
WHEN waf_action = 103 THEN 3 -- redirect -> 3
|
||||||
|
WHEN waf_action = 102 THEN 2 -- rate_limit -> 2 (restore even though deprecated)
|
||||||
|
WHEN waf_action = 105 THEN 5 -- challenge -> 5
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return count without triggering model validations
|
||||||
|
connection.execute("SELECT COUNT(*) FROM rules").first["count"]
|
||||||
|
end
|
||||||
|
|
||||||
|
# === Events Table ===
|
||||||
|
say_with_time "Reverting events table..." do
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE events
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 0 THEN 100 -- deny -> temp(100)
|
||||||
|
WHEN waf_action = 1 THEN 101 -- allow -> temp(101)
|
||||||
|
WHEN waf_action = 2 THEN 102 -- redirect -> temp(102)
|
||||||
|
WHEN waf_action = 3 THEN 103 -- challenge -> temp(103)
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE events
|
||||||
|
SET waf_action = CASE
|
||||||
|
WHEN waf_action = 101 THEN 0 -- allow -> 0
|
||||||
|
WHEN waf_action = 100 THEN 1 -- deny -> 1
|
||||||
|
WHEN waf_action = 102 THEN 2 -- redirect -> 2
|
||||||
|
WHEN waf_action = 103 THEN 3 -- challenge -> 3
|
||||||
|
ELSE waf_action
|
||||||
|
END
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return count without triggering model validations
|
||||||
|
connection.execute("SELECT COUNT(*) FROM events").first["count"]
|
||||||
|
end
|
||||||
|
|
||||||
|
say "Revert complete!", true
|
||||||
|
end
|
||||||
|
end
|
||||||
6
db/migrate/20251118071813_add_is_bot_to_events.rb
Normal file
6
db/migrate/20251118071813_add_is_bot_to_events.rb
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
class AddIsBotToEvents < ActiveRecord::Migration[8.1]
|
||||||
|
def change
|
||||||
|
add_column :events, :is_bot, :boolean, default: false, null: false
|
||||||
|
add_index :events, :is_bot
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Migrate add_header rules to use allow action with tags/headers in metadata
|
||||||
|
#
|
||||||
|
# Old pattern:
|
||||||
|
# waf_action: add_header (5)
|
||||||
|
# metadata: { header_name: "X-Bot-Agent", header_value: "googlebot" }
|
||||||
|
#
|
||||||
|
# New pattern:
|
||||||
|
# waf_action: allow (1)
|
||||||
|
# metadata: {
|
||||||
|
# headers: { "X-Bot-Agent" => "googlebot" },
|
||||||
|
# tags: ["bot:googlebot"]
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
class MigrateAddHeaderRulesToAllowWithTags < ActiveRecord::Migration[8.1]
|
||||||
|
def up
|
||||||
|
# Change all add_header (5) rules to allow (1)
|
||||||
|
# Keep metadata as-is for now - will be handled by Rule helper methods
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = 1 -- allow
|
||||||
|
WHERE waf_action = 5 -- add_header
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
# This rollback is conservative - only revert rules that clearly came from add_header
|
||||||
|
# (have header_name/header_value in metadata but not headers)
|
||||||
|
execute <<-SQL
|
||||||
|
UPDATE rules
|
||||||
|
SET waf_action = 5 -- add_header
|
||||||
|
WHERE waf_action = 1 -- allow
|
||||||
|
AND metadata ? 'header_name'
|
||||||
|
AND metadata ? 'header_value'
|
||||||
|
AND NOT metadata ? 'headers'
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -10,7 +10,7 @@
|
|||||||
#
|
#
|
||||||
# It's strongly recommended that you check this file into your version control system.
|
# It's strongly recommended that you check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema[8.1].define(version: 2025_11_13_052831) do
|
ActiveRecord::Schema[8.1].define(version: 2025_11_20_003554) do
|
||||||
# These are extensions that must be enabled in order to support this database
|
# These are extensions that must be enabled in order to support this database
|
||||||
enable_extension "pg_catalog.plpgsql"
|
enable_extension "pg_catalog.plpgsql"
|
||||||
|
|
||||||
@@ -80,6 +80,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_13_052831) do
|
|||||||
t.datetime "created_at", null: false
|
t.datetime "created_at", null: false
|
||||||
t.string "environment"
|
t.string "environment"
|
||||||
t.inet "ip_address"
|
t.inet "ip_address"
|
||||||
|
t.boolean "is_bot", default: false, null: false
|
||||||
t.boolean "is_datacenter", default: false, null: false
|
t.boolean "is_datacenter", default: false, null: false
|
||||||
t.boolean "is_proxy", default: false, null: false
|
t.boolean "is_proxy", default: false, null: false
|
||||||
t.boolean "is_vpn", default: false, null: false
|
t.boolean "is_vpn", default: false, null: false
|
||||||
@@ -105,6 +106,7 @@ ActiveRecord::Schema[8.1].define(version: 2025_11_13_052831) do
|
|||||||
t.index ["company"], name: "index_events_on_company"
|
t.index ["company"], name: "index_events_on_company"
|
||||||
t.index ["country"], name: "index_events_on_country"
|
t.index ["country"], name: "index_events_on_country"
|
||||||
t.index ["ip_address"], name: "index_events_on_ip_address"
|
t.index ["ip_address"], name: "index_events_on_ip_address"
|
||||||
|
t.index ["is_bot"], name: "index_events_on_is_bot"
|
||||||
t.index ["is_datacenter", "is_vpn", "is_proxy"], name: "index_events_on_network_flags"
|
t.index ["is_datacenter", "is_vpn", "is_proxy"], name: "index_events_on_network_flags"
|
||||||
t.index ["network_range_id"], name: "index_events_on_network_range_id"
|
t.index ["network_range_id"], name: "index_events_on_network_range_id"
|
||||||
t.index ["request_host_id", "request_method", "request_segment_ids"], name: "idx_events_host_method_path"
|
t.index ["request_host_id", "request_method", "request_segment_ids"], name: "idx_events_host_method_path"
|
||||||
|
|||||||
@@ -437,7 +437,7 @@ Ipv4Range.upsert({
|
|||||||
network_start: cidr.to_i,
|
network_start: cidr.to_i,
|
||||||
network_end: cidr.to_range.end.to_i,
|
network_end: cidr.to_range.end.to_i,
|
||||||
network_prefix: 8,
|
network_prefix: 8,
|
||||||
waf_action: 1, # deny
|
waf_action: 0, # deny
|
||||||
priority: 8
|
priority: 8
|
||||||
}, unique_by: :source)
|
}, unique_by: :source)
|
||||||
|
|
||||||
|
|||||||
127
lib/tasks/duckdb.rake
Normal file
127
lib/tasks/duckdb.rake
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
namespace :duckdb do
|
||||||
|
desc "Rebuild DuckDB analytics database from scratch"
|
||||||
|
task rebuild: :environment do
|
||||||
|
puts "=" * 80
|
||||||
|
puts "DuckDB Rebuild"
|
||||||
|
puts "=" * 80
|
||||||
|
puts
|
||||||
|
|
||||||
|
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||||
|
|
||||||
|
# Step 1: Check if DuckDB exists
|
||||||
|
if File.exist?(duckdb_path)
|
||||||
|
puts "🗑️ Deleting existing DuckDB database..."
|
||||||
|
File.delete(duckdb_path)
|
||||||
|
puts " ✅ Deleted: #{duckdb_path}"
|
||||||
|
puts
|
||||||
|
else
|
||||||
|
puts "ℹ️ No existing DuckDB database found"
|
||||||
|
puts
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 2: Rebuild from PostgreSQL
|
||||||
|
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
|
||||||
|
puts
|
||||||
|
|
||||||
|
start_time = Time.current
|
||||||
|
begin
|
||||||
|
SyncEventsToDuckdbJob.perform_now
|
||||||
|
duration = Time.current - start_time
|
||||||
|
|
||||||
|
# Step 3: Verify the rebuild
|
||||||
|
event_count = AnalyticsDuckdbService.instance.event_count
|
||||||
|
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||||
|
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
|
||||||
|
result.first&.first || 0
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "=" * 80
|
||||||
|
puts "✅ DuckDB Rebuild Complete!"
|
||||||
|
puts "=" * 80
|
||||||
|
puts " Duration: #{duration.round(2)}s"
|
||||||
|
puts " Total events synced: #{event_count}"
|
||||||
|
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||||
|
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||||
|
puts
|
||||||
|
puts "📂 Database location: #{duckdb_path}"
|
||||||
|
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
|
||||||
|
puts
|
||||||
|
rescue => e
|
||||||
|
puts "❌ Error rebuilding DuckDB: #{e.message}"
|
||||||
|
puts e.backtrace.first(5).join("\n")
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
desc "Show DuckDB statistics"
|
||||||
|
task stats: :environment do
|
||||||
|
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||||
|
|
||||||
|
unless File.exist?(duckdb_path)
|
||||||
|
puts "❌ DuckDB database not found at: #{duckdb_path}"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "=" * 80
|
||||||
|
puts "DuckDB Statistics"
|
||||||
|
puts "=" * 80
|
||||||
|
puts
|
||||||
|
|
||||||
|
total = AnalyticsDuckdbService.instance.event_count
|
||||||
|
|
||||||
|
AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||||
|
# Bot breakdown
|
||||||
|
result = conn.query(<<~SQL)
|
||||||
|
SELECT
|
||||||
|
is_bot,
|
||||||
|
COUNT(*) as event_count,
|
||||||
|
COUNT(DISTINCT ip_address) as unique_ips
|
||||||
|
FROM events
|
||||||
|
GROUP BY is_bot
|
||||||
|
SQL
|
||||||
|
|
||||||
|
puts "📊 Bot Traffic Breakdown:"
|
||||||
|
result.each do |row|
|
||||||
|
type = row[0] ? "🤖 Bots" : "👤 Humans"
|
||||||
|
count = row[1]
|
||||||
|
ips = row[2]
|
||||||
|
percentage = (count.to_f / total * 100).round(1)
|
||||||
|
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
|
||||||
|
end
|
||||||
|
puts
|
||||||
|
|
||||||
|
# Date range
|
||||||
|
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
|
||||||
|
min_ts, max_ts = range_result.first
|
||||||
|
puts "📅 Date Range:"
|
||||||
|
puts " Oldest event: #{min_ts}"
|
||||||
|
puts " Newest event: #{max_ts}"
|
||||||
|
puts
|
||||||
|
|
||||||
|
# Database info
|
||||||
|
puts "💾 Database Info:"
|
||||||
|
puts " Location: #{duckdb_path}"
|
||||||
|
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
|
||||||
|
puts " Total events: #{total}"
|
||||||
|
puts
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
desc "Sync new events from PostgreSQL to DuckDB"
|
||||||
|
task sync: :environment do
|
||||||
|
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
|
||||||
|
start_time = Time.current
|
||||||
|
|
||||||
|
begin
|
||||||
|
SyncEventsToDuckdbJob.perform_now
|
||||||
|
duration = Time.current - start_time
|
||||||
|
|
||||||
|
puts "✅ Sync complete in #{duration.round(2)}s"
|
||||||
|
rescue => e
|
||||||
|
puts "❌ Error syncing: #{e.message}"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
65
test/controllers/rules_controller_test.rb
Normal file
65
test/controllers/rules_controller_test.rb
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class RulesControllerTest < ActionDispatch::IntegrationTest
|
||||||
|
setup do
|
||||||
|
@user = users(:one)
|
||||||
|
sign_in_as(@user)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should create network rule with add_header action" do
|
||||||
|
assert_difference('Rule.count') do
|
||||||
|
post rules_path, params: {
|
||||||
|
rule: {
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "add_header",
|
||||||
|
network_range_id: "",
|
||||||
|
conditions: "{}",
|
||||||
|
metadata: "{}",
|
||||||
|
source: "manual",
|
||||||
|
expires_at: "",
|
||||||
|
enabled: "1"
|
||||||
|
},
|
||||||
|
new_cidr: "52.167.145.0/24",
|
||||||
|
path_pattern: "",
|
||||||
|
match_type: "exact",
|
||||||
|
header_name: "X-Bot-Agent",
|
||||||
|
header_value: "Blah"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
rule = Rule.last
|
||||||
|
assert_equal "network", rule.waf_rule_type
|
||||||
|
assert_equal "add_header", rule.waf_action, "waf_action should be 'add_header' but was #{rule.waf_action.inspect}"
|
||||||
|
assert_equal "X-Bot-Agent", rule.metadata["header_name"]
|
||||||
|
assert_equal "Blah", rule.metadata["header_value"]
|
||||||
|
assert_not_nil rule.network_range
|
||||||
|
# Network range stores as /32 if no prefix given
|
||||||
|
assert_match /52\.167\.145\./, rule.network_range.network.to_s
|
||||||
|
|
||||||
|
# Verify metadata JSON doesn't have duplicate keys
|
||||||
|
metadata_json = rule.metadata.to_json
|
||||||
|
refute_includes metadata_json, '"header_name":"X-Bot-Agent","header_value":"Blah","reason":"{}","header_name"',
|
||||||
|
"Metadata should not have duplicate keys"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should create rule with waf_action properly set from string parameter" do
|
||||||
|
assert_difference('Rule.count') do
|
||||||
|
post rules_path, params: {
|
||||||
|
rule: {
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "deny", # Test with different action
|
||||||
|
network_range_id: "",
|
||||||
|
conditions: "{}",
|
||||||
|
metadata: '{"reason": "test"}',
|
||||||
|
source: "manual",
|
||||||
|
enabled: "1"
|
||||||
|
},
|
||||||
|
new_cidr: "10.0.0.1/32"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
rule = Rule.last
|
||||||
|
assert_equal "deny", rule.waf_action, "waf_action should be 'deny'"
|
||||||
|
assert_equal "network", rule.waf_rule_type
|
||||||
|
end
|
||||||
|
end
|
||||||
66
test/fixtures/files/ipapi_91_84_96_0.json
vendored
Normal file
66
test/fixtures/files/ipapi_91_84_96_0.json
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
"ip": "91.84.96.0",
|
||||||
|
"rir": "RIPE",
|
||||||
|
"is_bogon": false,
|
||||||
|
"is_mobile": false,
|
||||||
|
"is_satellite": false,
|
||||||
|
"is_crawler": false,
|
||||||
|
"is_datacenter": true,
|
||||||
|
"is_tor": false,
|
||||||
|
"is_proxy": false,
|
||||||
|
"is_vpn": false,
|
||||||
|
"is_abuser": false,
|
||||||
|
"datacenter": {
|
||||||
|
"datacenter": "SERVERS TECH FZCO",
|
||||||
|
"domain": "vdsina.com",
|
||||||
|
"network": "91.84.96.0 - 91.84.127.255"
|
||||||
|
},
|
||||||
|
"company": {
|
||||||
|
"name": "SERVERS TECH FZCO",
|
||||||
|
"abuser_score": "0.0162 (Elevated)",
|
||||||
|
"domain": "vdsina.com",
|
||||||
|
"type": "hosting",
|
||||||
|
"network": "91.84.96.0 - 91.84.127.255",
|
||||||
|
"whois": "https://api.ipapi.is/?whois=91.84.96.0"
|
||||||
|
},
|
||||||
|
"abuse": {
|
||||||
|
"name": "SERVERS TECH FZCO",
|
||||||
|
"address": "UNITED ARAB EMIRATES, Dubai, 336469, Ifza Business Park DDP, Building 1, office number 36298-001",
|
||||||
|
"email": "abuse@vdsina.com",
|
||||||
|
"phone": "+971525386329"
|
||||||
|
},
|
||||||
|
"asn": {
|
||||||
|
"asn": 216071,
|
||||||
|
"abuser_score": "0.0181 (Elevated)",
|
||||||
|
"route": "91.84.96.0/24",
|
||||||
|
"descr": "VDSINA, AE",
|
||||||
|
"country": "ae",
|
||||||
|
"active": true,
|
||||||
|
"org": "SERVERS TECH FZCO",
|
||||||
|
"domain": "vdsina.com",
|
||||||
|
"abuse": "abuse@vdsina.com",
|
||||||
|
"type": "hosting",
|
||||||
|
"created": "2023-10-30",
|
||||||
|
"updated": "2023-10-30",
|
||||||
|
"rir": "RIPE",
|
||||||
|
"whois": "https://api.ipapi.is/?whois=AS216071"
|
||||||
|
},
|
||||||
|
"location": {
|
||||||
|
"is_eu_member": true,
|
||||||
|
"calling_code": "31",
|
||||||
|
"currency_code": "EUR",
|
||||||
|
"continent": "EU",
|
||||||
|
"country": "The Netherlands",
|
||||||
|
"country_code": "NL",
|
||||||
|
"state": "North Holland",
|
||||||
|
"city": "Amsterdam",
|
||||||
|
"latitude": 52.37403,
|
||||||
|
"longitude": 4.88969,
|
||||||
|
"zip": "1384",
|
||||||
|
"timezone": "Europe/Brussels",
|
||||||
|
"local_time": "2025-11-17T22:21:06+01:00",
|
||||||
|
"local_time_unix": 1763414466,
|
||||||
|
"is_dst": false
|
||||||
|
},
|
||||||
|
"elapsed_ms": 0.5
|
||||||
|
}
|
||||||
4
test/fixtures/users.yml
vendored
4
test/fixtures/users.yml
vendored
@@ -7,3 +7,7 @@ one:
|
|||||||
two:
|
two:
|
||||||
email_address: two@example.com
|
email_address: two@example.com
|
||||||
password_digest: <%= password_digest %>
|
password_digest: <%= password_digest %>
|
||||||
|
|
||||||
|
jason:
|
||||||
|
email_address: jason@example.com
|
||||||
|
password_digest: <%= password_digest %>
|
||||||
|
|||||||
195
test/jobs/cleanup_old_events_job_test.rb
Normal file
195
test/jobs/cleanup_old_events_job_test.rb
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class CleanupOldEventsJobTest < ActiveJob::TestCase
|
||||||
|
setup do
|
||||||
|
# Clear any existing events
|
||||||
|
Event.delete_all
|
||||||
|
# Set default retention to 90 days
|
||||||
|
Setting.set('event_retention_days', '90')
|
||||||
|
end
|
||||||
|
|
||||||
|
test "deletes events older than retention period" do
|
||||||
|
# Create old event (100 days ago - should be deleted)
|
||||||
|
old_event = Event.create!(
|
||||||
|
request_id: "old-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 100.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create recent event (30 days ago - should be kept)
|
||||||
|
recent_event = Event.create!(
|
||||||
|
request_id: "recent-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 30.days.ago,
|
||||||
|
ip_address: "5.6.7.8",
|
||||||
|
payload: { request: { ip: "5.6.7.8" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 1, count
|
||||||
|
assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
|
||||||
|
assert_nothing_raised { recent_event.reload }
|
||||||
|
end
|
||||||
|
|
||||||
|
test "respects custom retention period" do
|
||||||
|
# Set retention to 30 days
|
||||||
|
Setting.set('event_retention_days', '30')
|
||||||
|
|
||||||
|
# Create event that's 40 days old (should be deleted with 30-day retention)
|
||||||
|
old_event = Event.create!(
|
||||||
|
request_id: "old-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 40.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create event that's 20 days old (should be kept)
|
||||||
|
recent_event = Event.create!(
|
||||||
|
request_id: "recent-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 20.days.ago,
|
||||||
|
ip_address: "5.6.7.8",
|
||||||
|
payload: { request: { ip: "5.6.7.8" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 1, count
|
||||||
|
assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
|
||||||
|
assert_nothing_raised { recent_event.reload }
|
||||||
|
end
|
||||||
|
|
||||||
|
test "does not delete when retention is zero" do
|
||||||
|
Setting.set('event_retention_days', '0')
|
||||||
|
|
||||||
|
old_event = Event.create!(
|
||||||
|
request_id: "old-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 100.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 0, count
|
||||||
|
assert_nothing_raised { old_event.reload }
|
||||||
|
end
|
||||||
|
|
||||||
|
test "does not delete when retention is negative" do
|
||||||
|
Setting.set('event_retention_days', '-1')
|
||||||
|
|
||||||
|
old_event = Event.create!(
|
||||||
|
request_id: "old-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 100.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 0, count
|
||||||
|
assert_nothing_raised { old_event.reload }
|
||||||
|
end
|
||||||
|
|
||||||
|
test "returns zero when no old events exist" do
|
||||||
|
# Create only recent events
|
||||||
|
Event.create!(
|
||||||
|
request_id: "recent-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 30.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 0, count
|
||||||
|
end
|
||||||
|
|
||||||
|
test "returns zero when no events exist" do
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 0, count
|
||||||
|
end
|
||||||
|
|
||||||
|
test "deletes multiple old events" do
|
||||||
|
# Create 5 old events
|
||||||
|
5.times do |i|
|
||||||
|
Event.create!(
|
||||||
|
request_id: "old-request-#{i}-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 100.days.ago,
|
||||||
|
ip_address: "1.2.3.#{i}",
|
||||||
|
payload: { request: { ip: "1.2.3.#{i}" } }
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create 3 recent events
|
||||||
|
3.times do |i|
|
||||||
|
Event.create!(
|
||||||
|
request_id: "recent-request-#{i}-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 30.days.ago,
|
||||||
|
ip_address: "5.6.7.#{i}",
|
||||||
|
payload: { request: { ip: "5.6.7.#{i}" } }
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 5, count
|
||||||
|
assert_equal 3, Event.count
|
||||||
|
end
|
||||||
|
|
||||||
|
test "uses default retention when setting not configured" do
|
||||||
|
# Remove the setting
|
||||||
|
Setting.find_by(key: 'event_retention_days')&.destroy
|
||||||
|
|
||||||
|
# Create event that's 100 days old (should be deleted with default 90-day retention)
|
||||||
|
old_event = Event.create!(
|
||||||
|
request_id: "old-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 100.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create event that's 80 days old (should be kept with default 90-day retention)
|
||||||
|
recent_event = Event.create!(
|
||||||
|
request_id: "recent-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 80.days.ago,
|
||||||
|
ip_address: "5.6.7.8",
|
||||||
|
payload: { request: { ip: "5.6.7.8" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 1, count
|
||||||
|
assert_raises(ActiveRecord::RecordNotFound) { old_event.reload }
|
||||||
|
assert_nothing_raised { recent_event.reload }
|
||||||
|
end
|
||||||
|
|
||||||
|
test "handles events at exact cutoff boundary correctly" do
|
||||||
|
Setting.set('event_retention_days', '90')
|
||||||
|
|
||||||
|
# Create event exactly at cutoff (should be deleted - uses < comparison)
|
||||||
|
cutoff_event = Event.create!(
|
||||||
|
request_id: "cutoff-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 90.days.ago,
|
||||||
|
ip_address: "1.2.3.4",
|
||||||
|
payload: { request: { ip: "1.2.3.4" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create event just inside cutoff (should be kept)
|
||||||
|
inside_event = Event.create!(
|
||||||
|
request_id: "inside-request-#{SecureRandom.uuid}",
|
||||||
|
timestamp: 89.days.ago,
|
||||||
|
ip_address: "5.6.7.8",
|
||||||
|
payload: { request: { ip: "5.6.7.8" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
count = CleanupOldEventsJob.perform_now
|
||||||
|
|
||||||
|
assert_equal 1, count
|
||||||
|
assert_raises(ActiveRecord::RecordNotFound) { cutoff_event.reload }
|
||||||
|
assert_nothing_raised { inside_event.reload }
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -354,10 +354,4 @@ class ProcessWafEventJobTest < ActiveJob::TestCase
|
|||||||
assert_equal 100, Event.count
|
assert_equal 100, Event.count
|
||||||
assert processing_time < 5.seconds, "Processing 100 events should take less than 5 seconds"
|
assert processing_time < 5.seconds, "Processing 100 events should take less than 5 seconds"
|
||||||
end
|
end
|
||||||
|
|
||||||
# Integration with Other Jobs
|
|
||||||
test "coordinates with BackfillRecentNetworkIntelligenceJob" do
|
|
||||||
# This would be tested based on how the job enqueues other jobs
|
|
||||||
# Implementation depends on your specific job coordination logic
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
@@ -78,14 +78,17 @@ class EventTest < ActiveSupport::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
test "create_from_waf_payload! properly normalizes waf_action enum" do
|
test "create_from_waf_payload! properly normalizes waf_action enum" do
|
||||||
|
# Updated enum values: deny:0, allow:1, redirect:2, challenge:3, log:4
|
||||||
test_actions = [
|
test_actions = [
|
||||||
["allow", :allow, 0],
|
["deny", :deny, 0],
|
||||||
["pass", :allow, 0],
|
["block", :deny, 0],
|
||||||
["deny", :deny, 1],
|
["allow", :allow, 1],
|
||||||
["block", :deny, 1],
|
["pass", :allow, 1],
|
||||||
["redirect", :redirect, 2],
|
["redirect", :redirect, 2],
|
||||||
["challenge", :challenge, 3],
|
["challenge", :challenge, 3],
|
||||||
["unknown", :allow, 0] # Default fallback
|
["log", :log, 4],
|
||||||
|
["monitor", :log, 4],
|
||||||
|
["unknown", :allow, 1] # Default fallback
|
||||||
]
|
]
|
||||||
|
|
||||||
test_actions.each do |action, expected_enum, expected_int|
|
test_actions.each do |action, expected_enum, expected_int|
|
||||||
@@ -122,20 +125,20 @@ class EventTest < ActiveSupport::TestCase
|
|||||||
test "enum values persist after save and reload" do
|
test "enum values persist after save and reload" do
|
||||||
event = Event.create_from_waf_payload!("test-persist", @sample_payload)
|
event = Event.create_from_waf_payload!("test-persist", @sample_payload)
|
||||||
|
|
||||||
# Verify initial values
|
# Verify initial values (updated enum: deny:0, allow:1)
|
||||||
assert_equal "get", event.request_method
|
assert_equal "get", event.request_method
|
||||||
assert_equal "allow", event.waf_action
|
assert_equal "allow", event.waf_action
|
||||||
assert_equal 0, event.request_method_before_type_cast
|
assert_equal 0, event.request_method_before_type_cast
|
||||||
assert_equal 0, event.waf_action_before_type_cast
|
assert_equal 1, event.waf_action_before_type_cast # allow is now 1
|
||||||
|
|
||||||
# Reload from database
|
# Reload from database
|
||||||
event.reload
|
event.reload
|
||||||
|
|
||||||
# Values should still be correct
|
# Values should still be correct (allow is now 1)
|
||||||
assert_equal "get", event.request_method
|
assert_equal "get", event.request_method
|
||||||
assert_equal "allow", event.waf_action
|
assert_equal "allow", event.waf_action
|
||||||
assert_equal 0, event.request_method_before_type_cast
|
assert_equal 0, event.request_method_before_type_cast
|
||||||
assert_equal 0, event.waf_action_before_type_cast
|
assert_equal 1, event.waf_action_before_type_cast
|
||||||
end
|
end
|
||||||
|
|
||||||
test "enum scopes work correctly" do
|
test "enum scopes work correctly" do
|
||||||
@@ -225,7 +228,7 @@ class EventTest < ActiveSupport::TestCase
|
|||||||
assert_equal "post", event.request_method
|
assert_equal "post", event.request_method
|
||||||
assert_equal "deny", event.waf_action
|
assert_equal "deny", event.waf_action
|
||||||
assert_equal 1, event.request_method_before_type_cast # POST = 1
|
assert_equal 1, event.request_method_before_type_cast # POST = 1
|
||||||
assert_equal 1, event.waf_action_before_type_cast # DENY = 1
|
assert_equal 0, event.waf_action_before_type_cast # DENY = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
test "payload extraction methods work correctly" do
|
test "payload extraction methods work correctly" do
|
||||||
@@ -260,7 +263,7 @@ class EventTest < ActiveSupport::TestCase
|
|||||||
# Test boolean methods
|
# Test boolean methods
|
||||||
assert event.allowed?
|
assert event.allowed?
|
||||||
assert_not event.blocked?
|
assert_not event.blocked?
|
||||||
assert_not event.rate_limited?
|
assert_not event.logged? # Changed from rate_limited? to logged?
|
||||||
assert_not event.challenged?
|
assert_not event.challenged?
|
||||||
assert_not event.rule_matched?
|
assert_not event.rule_matched?
|
||||||
|
|
||||||
|
|||||||
@@ -211,16 +211,51 @@ class NetworkRangeTest < ActiveSupport::TestCase
|
|||||||
assert_equal @ipv4_range, children.first
|
assert_equal @ipv4_range, children.first
|
||||||
end
|
end
|
||||||
|
|
||||||
test "sibling_ranges finds same-level networks" do
|
test "child_ranges works with Apple network hierarchy - 17.240.0.0/14" do
|
||||||
# Create sibling networks
|
# This test demonstrates the current bug in child_ranges method
|
||||||
sibling1 = NetworkRange.create!(network: "192.168.0.0/24")
|
# Expected: 17.240.0.0/14 should have parents but no children in this test setup
|
||||||
@ipv4_range.save! # 192.168.1.0/24
|
|
||||||
sibling2 = NetworkRange.create!(network: "192.168.2.0/24")
|
|
||||||
|
|
||||||
siblings = @ipv4_range.sibling_ranges
|
# Create the target network
|
||||||
assert_includes siblings, sibling1
|
target_network = NetworkRange.create!(network: "17.240.0.0/14", source: "manual")
|
||||||
assert_includes siblings, sibling2
|
|
||||||
assert_not_includes siblings, @ipv4_range
|
# Create parent networks
|
||||||
|
parent1 = NetworkRange.create!(network: "17.240.0.0/13", source: "manual") # Should contain 17.240.0.0/14
|
||||||
|
parent2 = NetworkRange.create!(network: "17.128.0.0/9", source: "manual") # Should also contain 17.240.0.0/14
|
||||||
|
|
||||||
|
# Create some child networks (more specific networks contained by 17.240.0.0/14)
|
||||||
|
child1 = NetworkRange.create!(network: "17.240.0.0/15", source: "manual") # First half of /14
|
||||||
|
child2 = NetworkRange.create!(network: "17.242.0.0/15", source: "manual") # Second half of /14
|
||||||
|
child3 = NetworkRange.create!(network: "17.240.0.0/16", source: "manual") # More specific
|
||||||
|
child4 = NetworkRange.create!(network: "17.241.0.0/16", source: "manual") # More specific
|
||||||
|
|
||||||
|
# Test parent_ranges works correctly
|
||||||
|
parents = target_network.parent_ranges
|
||||||
|
assert_includes parents, parent1, "17.240.0.0/13 should be a parent of 17.240.0.0/14"
|
||||||
|
assert_includes parents, parent2, "17.128.0.0/9 should be a parent of 17.240.0.0/14"
|
||||||
|
|
||||||
|
# Test child_ranges - this is currently failing due to the bug
|
||||||
|
children = target_network.child_ranges
|
||||||
|
assert_includes children, child1, "17.240.0.0/15 should be a child of 17.240.0.0/14"
|
||||||
|
assert_includes children, child2, "17.242.0.0/15 should be a child of 17.240.0.0/14"
|
||||||
|
assert_includes children, child3, "17.240.0.0/16 should be a child of 17.240.0.0/14"
|
||||||
|
assert_includes children, child4, "17.241.0.0/16 should be a child of 17.240.0.0/14"
|
||||||
|
assert_not_includes children, parent1, "Parent networks should not be in child_ranges"
|
||||||
|
assert_not_includes children, parent2, "Parent networks should not be in child_ranges"
|
||||||
|
assert_not_includes children, target_network, "Self should not be in child_ranges"
|
||||||
|
|
||||||
|
# Test that parent can find child in its child_ranges
|
||||||
|
parent1_children = parent1.child_ranges
|
||||||
|
assert_includes parent1_children, target_network, "17.240.0.0/14 should be in child_ranges of 17.240.0.0/13"
|
||||||
|
|
||||||
|
parent2_children = parent2.child_ranges
|
||||||
|
assert_includes parent2_children, target_network, "17.240.0.0/14 should be in child_ranges of 17.128.0.0/9"
|
||||||
|
|
||||||
|
# Test bidirectional consistency
|
||||||
|
assert target_network.parent_ranges.include?(parent1), "Parent should list child"
|
||||||
|
assert parent1.child_ranges.include?(target_network), "Child should list parent"
|
||||||
|
|
||||||
|
assert target_network.parent_ranges.include?(parent2), "Parent should list child"
|
||||||
|
assert parent2.child_ranges.include?(target_network), "Child should list parent"
|
||||||
end
|
end
|
||||||
|
|
||||||
# Intelligence and Inheritance
|
# Intelligence and Inheritance
|
||||||
|
|||||||
233
test/models/rule_path_pattern_test.rb
Normal file
233
test/models/rule_path_pattern_test.rb
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class RulePathPatternTest < ActiveSupport::TestCase
|
||||||
|
setup do
|
||||||
|
@user = User.create!(email_address: "test@example.com", password: "password123")
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule creates valid rule" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin/users",
|
||||||
|
match_type: "exact",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rule.persisted?, "Rule should be persisted"
|
||||||
|
assert_equal "path_pattern", rule.waf_rule_type
|
||||||
|
assert_equal "deny", rule.waf_action
|
||||||
|
assert_equal "exact", rule.path_match_type
|
||||||
|
assert_equal 2, rule.path_segment_ids.length
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule auto-creates PathSegments" do
|
||||||
|
initial_count = PathSegment.count
|
||||||
|
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/new/path/here",
|
||||||
|
match_type: "prefix",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal initial_count + 3, PathSegment.count, "Should create 3 new segments"
|
||||||
|
assert_equal 3, rule.path_segment_ids.length
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule normalizes to lowercase" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/Admin/Users",
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
segments = rule.path_segments_text
|
||||||
|
assert_equal ["admin", "users"], segments, "Segments should be lowercase"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule reuses existing PathSegments" do
|
||||||
|
# Create segment first
|
||||||
|
PathSegment.find_or_create_segment("admin")
|
||||||
|
initial_count = PathSegment.count
|
||||||
|
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal initial_count, PathSegment.count, "Should not create duplicate segment"
|
||||||
|
assert_equal 1, rule.path_segment_ids.length
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule validates match_type" do
|
||||||
|
assert_raises(ArgumentError, "Should raise for invalid match_type") do
|
||||||
|
Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "invalid",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "create_path_pattern_rule validates pattern not empty" do
|
||||||
|
assert_raises(ArgumentError, "Should raise for empty pattern") do
|
||||||
|
Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/",
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "validation requires segment_ids for path_pattern rules" do
|
||||||
|
rule = Rule.new(
|
||||||
|
waf_rule_type: "path_pattern",
|
||||||
|
waf_action: "deny",
|
||||||
|
conditions: { match_type: "exact" }, # Missing segment_ids
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
refute rule.valid?, "Rule should be invalid without segment_ids"
|
||||||
|
assert_includes rule.errors[:conditions], "must include 'segment_ids' array for path_pattern rules"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "validation requires match_type for path_pattern rules" do
|
||||||
|
admin_seg = PathSegment.find_or_create_segment("admin")
|
||||||
|
|
||||||
|
rule = Rule.new(
|
||||||
|
waf_rule_type: "path_pattern",
|
||||||
|
waf_action: "deny",
|
||||||
|
conditions: { segment_ids: [admin_seg.id] }, # Missing match_type
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
refute rule.valid?, "Rule should be invalid without match_type"
|
||||||
|
assert_includes rule.errors[:conditions], "match_type must be one of: exact, prefix, suffix, contains"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "validation checks match_type is valid" do
|
||||||
|
admin_seg = PathSegment.find_or_create_segment("admin")
|
||||||
|
|
||||||
|
rule = Rule.new(
|
||||||
|
waf_rule_type: "path_pattern",
|
||||||
|
waf_action: "deny",
|
||||||
|
conditions: { segment_ids: [admin_seg.id], match_type: "invalid" },
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
refute rule.valid?, "Rule should be invalid with invalid match_type"
|
||||||
|
assert_includes rule.errors[:conditions], "match_type must be one of: exact, prefix, suffix, contains"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "validation checks segment IDs exist" do
|
||||||
|
rule = Rule.new(
|
||||||
|
waf_rule_type: "path_pattern",
|
||||||
|
waf_action: "deny",
|
||||||
|
conditions: { segment_ids: [99999], match_type: "exact" }, # Non-existent ID
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
refute rule.valid?, "Rule should be invalid with non-existent segment IDs"
|
||||||
|
assert_match(/non-existent path segment IDs/, rule.errors[:conditions].first)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "path_pattern_display returns human-readable path" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin/users",
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal "/admin/users", rule.path_pattern_display
|
||||||
|
end
|
||||||
|
|
||||||
|
test "path_segments_text returns segment text array" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/api/v1/users",
|
||||||
|
match_type: "prefix",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal ["api", "v1", "users"], rule.path_segments_text
|
||||||
|
end
|
||||||
|
|
||||||
|
test "to_agent_format includes segment_ids and match_type for path rules" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "prefix",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
agent_format = rule.to_agent_format
|
||||||
|
|
||||||
|
assert_equal "path_pattern", agent_format[:waf_rule_type]
|
||||||
|
assert_equal "deny", agent_format[:waf_action]
|
||||||
|
assert agent_format[:conditions].key?(:segment_ids), "Should include segment_ids"
|
||||||
|
assert_equal "prefix", agent_format[:conditions][:match_type]
|
||||||
|
assert_kind_of Array, agent_format[:conditions][:segment_ids]
|
||||||
|
end
|
||||||
|
|
||||||
|
test "supports all four match types" do
|
||||||
|
%w[exact prefix suffix contains].each do |match_type|
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: match_type,
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rule.persisted?, "Should create rule with #{match_type} match type"
|
||||||
|
assert_equal match_type, rule.path_match_type
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "supports all action types" do
|
||||||
|
%w[allow deny challenge].each do |action|
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "exact",
|
||||||
|
action: action,
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rule.persisted?, "Should create rule with #{action} action"
|
||||||
|
assert_equal action, rule.waf_action
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "supports redirect action with metadata" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "exact",
|
||||||
|
action: "redirect",
|
||||||
|
user: @user,
|
||||||
|
metadata: { redirect_url: "https://example.com" }
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rule.persisted?, "Should create rule with redirect action"
|
||||||
|
assert_equal "redirect", rule.waf_action
|
||||||
|
end
|
||||||
|
|
||||||
|
test "stores metadata with human-readable segments" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin/users",
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal ["admin", "users"], rule.metadata["segments"]
|
||||||
|
assert_equal "/admin/users", rule.metadata["pattern_display"]
|
||||||
|
end
|
||||||
|
|
||||||
|
test "stores original pattern in conditions" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/Admin/Users", # Mixed case
|
||||||
|
match_type: "exact",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal "/Admin/Users", rule.conditions["original_pattern"]
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -36,7 +36,7 @@ class RuleTest < ActiveSupport::TestCase
|
|||||||
test "should create valid rate_limit rule" do
|
test "should create valid rate_limit rule" do
|
||||||
rule = Rule.new(
|
rule = Rule.new(
|
||||||
waf_rule_type: "rate_limit",
|
waf_rule_type: "rate_limit",
|
||||||
waf_action: "rate_limit",
|
waf_action: "deny", # Rate limit rules use deny action when triggered
|
||||||
conditions: { cidr: "0.0.0.0/0", scope: "global" },
|
conditions: { cidr: "0.0.0.0/0", scope: "global" },
|
||||||
metadata: { limit: 100, window: 60 },
|
metadata: { limit: 100, window: 60 },
|
||||||
source: "manual",
|
source: "manual",
|
||||||
@@ -83,7 +83,7 @@ class RuleTest < ActiveSupport::TestCase
|
|||||||
test "should validate rate_limit has limit and window in metadata" do
|
test "should validate rate_limit has limit and window in metadata" do
|
||||||
rule = Rule.new(
|
rule = Rule.new(
|
||||||
waf_rule_type: "rate_limit",
|
waf_rule_type: "rate_limit",
|
||||||
waf_action: "rate_limit",
|
waf_action: "deny", # Rate limit rules use deny action when triggered
|
||||||
conditions: { cidr: "0.0.0.0/0", scope: "global" },
|
conditions: { cidr: "0.0.0.0/0", scope: "global" },
|
||||||
metadata: { limit: 100 }, # Missing window
|
metadata: { limit: 100 }, # Missing window
|
||||||
user: users(:one)
|
user: users(:one)
|
||||||
@@ -202,4 +202,95 @@ class RuleTest < ActiveSupport::TestCase
|
|||||||
assert_equal 8, format[:priority]
|
assert_equal 8, format[:priority]
|
||||||
assert_equal true, format[:enabled]
|
assert_equal true, format[:enabled]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Tag functionality tests
|
||||||
|
test "should store and retrieve tags in metadata" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
metadata: { tags: ["bot:googlebot", "trusted"] },
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal ["bot:googlebot", "trusted"], rule.tags
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should add tag to rule" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
rule.add_tag("bot:googlebot")
|
||||||
|
rule.save!
|
||||||
|
|
||||||
|
assert_includes rule.tags, "bot:googlebot"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should remove tag from rule" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
metadata: { tags: ["bot:googlebot", "trusted"] },
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
rule.remove_tag("trusted")
|
||||||
|
rule.save!
|
||||||
|
|
||||||
|
assert_not_includes rule.tags, "trusted"
|
||||||
|
assert_includes rule.tags, "bot:googlebot"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should check if rule has tag" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
metadata: { tags: ["bot:googlebot"] },
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rule.has_tag?("bot:googlebot")
|
||||||
|
assert_not rule.has_tag?("bot:bingbot")
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should store headers in metadata" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
metadata: {
|
||||||
|
tags: ["bot:googlebot"],
|
||||||
|
headers: { "X-Bot-Agent" => "googlebot" }
|
||||||
|
},
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_equal({ "X-Bot-Agent" => "googlebot" }, rule.headers)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should set tags via assignment" do
|
||||||
|
network_range = NetworkRange.create!(cidr: "10.0.0.0/8")
|
||||||
|
rule = Rule.create!(
|
||||||
|
waf_rule_type: "network",
|
||||||
|
waf_action: "allow",
|
||||||
|
network_range: network_range,
|
||||||
|
user: users(:one)
|
||||||
|
)
|
||||||
|
|
||||||
|
rule.tags = ["bot:bingbot", "network:microsoft"]
|
||||||
|
rule.save!
|
||||||
|
|
||||||
|
assert_equal ["bot:bingbot", "network:microsoft"], rule.tags
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
134
test/services/ipapi_test.rb
Normal file
134
test/services/ipapi_test.rb
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class IpapiTest < ActiveSupport::TestCase
|
||||||
|
def setup
|
||||||
|
@ipapi_data = JSON.parse(
|
||||||
|
File.read(Rails.root.join("test/fixtures/files/ipapi_91_84_96_0.json"))
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parse_company_network_range extracts and converts IP range to CIDR" do
|
||||||
|
cidr = Ipapi.parse_company_network_range(@ipapi_data)
|
||||||
|
|
||||||
|
assert_equal "91.84.96.0/19", cidr
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parse_company_network_range handles already formatted CIDR" do
|
||||||
|
data = { "company" => { "network" => "1.2.3.0/24" } }
|
||||||
|
cidr = Ipapi.parse_company_network_range(data)
|
||||||
|
|
||||||
|
assert_equal "1.2.3.0/24", cidr
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parse_company_network_range returns nil for invalid range" do
|
||||||
|
data = { "company" => { "network" => "invalid" } }
|
||||||
|
cidr = Ipapi.parse_company_network_range(data)
|
||||||
|
|
||||||
|
assert_nil cidr
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parse_company_network_range returns nil when no network data present" do
|
||||||
|
data = { "company" => {} }
|
||||||
|
cidr = Ipapi.parse_company_network_range(data)
|
||||||
|
|
||||||
|
assert_nil cidr
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parse_company_network_range falls back to datacenter.network" do
|
||||||
|
data = { "datacenter" => { "network" => "1.2.3.0 - 1.2.3.255" } }
|
||||||
|
cidr = Ipapi.parse_company_network_range(data)
|
||||||
|
|
||||||
|
assert_equal "1.2.3.0/24", cidr
|
||||||
|
end
|
||||||
|
|
||||||
|
test "populate_network_attributes sets all network attributes" do
|
||||||
|
network_range = NetworkRange.new(network: "91.84.96.0/24")
|
||||||
|
Ipapi.populate_network_attributes(network_range, @ipapi_data)
|
||||||
|
|
||||||
|
assert_equal 216071, network_range.asn
|
||||||
|
assert_equal "SERVERS TECH FZCO", network_range.asn_org
|
||||||
|
assert_equal "SERVERS TECH FZCO", network_range.company
|
||||||
|
assert_equal "NL", network_range.country
|
||||||
|
assert network_range.is_datacenter
|
||||||
|
refute network_range.is_vpn
|
||||||
|
refute network_range.is_proxy
|
||||||
|
end
|
||||||
|
|
||||||
|
test "process_ipapi_data creates both company and BGP route networks" do
|
||||||
|
# Use a different tracking network so BGP route gets created
|
||||||
|
tracking_network = NetworkRange.create!(
|
||||||
|
network: "91.84.97.0/24",
|
||||||
|
source: "auto_generated"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_difference "NetworkRange.count", 2 do
|
||||||
|
result = Ipapi.process_ipapi_data(@ipapi_data, tracking_network)
|
||||||
|
|
||||||
|
assert_equal 2, result[:networks].length
|
||||||
|
assert_equal "91.84.96.0/19", result[:broadest_cidr]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Verify company network was created
|
||||||
|
company_network = NetworkRange.find_by(network: "91.84.96.0/19")
|
||||||
|
assert_not_nil company_network
|
||||||
|
assert_equal "api_imported", company_network.source
|
||||||
|
assert_equal "SERVERS TECH FZCO", company_network.company
|
||||||
|
assert company_network.is_datacenter
|
||||||
|
|
||||||
|
# Verify BGP route network was created
|
||||||
|
bgp_network = NetworkRange.find_by(network: "91.84.96.0/24")
|
||||||
|
assert_not_nil bgp_network
|
||||||
|
assert_equal "SERVERS TECH FZCO", bgp_network.company
|
||||||
|
end
|
||||||
|
|
||||||
|
test "process_ipapi_data handles missing company network gracefully" do
|
||||||
|
# Create data without company network range
|
||||||
|
data = @ipapi_data.deep_dup
|
||||||
|
data["company"].delete("network")
|
||||||
|
data["datacenter"].delete("network")
|
||||||
|
|
||||||
|
tracking_network = NetworkRange.create!(
|
||||||
|
network: "91.84.96.0/24",
|
||||||
|
source: "auto_generated"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should only create the BGP route network (which matches tracking, so 0 new)
|
||||||
|
assert_no_difference "NetworkRange.count" do
|
||||||
|
result = Ipapi.process_ipapi_data(data, tracking_network)
|
||||||
|
|
||||||
|
assert_equal 0, result[:networks].length
|
||||||
|
assert_equal "91.84.96.0/24", result[:broadest_cidr]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "process_ipapi_data updates existing networks instead of creating duplicates" do
|
||||||
|
# Pre-create both networks
|
||||||
|
company_network = NetworkRange.create!(
|
||||||
|
network: "91.84.96.0/19",
|
||||||
|
source: "manual",
|
||||||
|
company: "Old Company"
|
||||||
|
)
|
||||||
|
|
||||||
|
bgp_network = NetworkRange.create!(
|
||||||
|
network: "91.84.96.0/24",
|
||||||
|
source: "manual"
|
||||||
|
)
|
||||||
|
|
||||||
|
tracking_network = NetworkRange.create!(
|
||||||
|
network: "91.84.97.0/24",
|
||||||
|
source: "auto_generated"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not create new networks, just update existing ones
|
||||||
|
assert_no_difference "NetworkRange.count" do
|
||||||
|
result = Ipapi.process_ipapi_data(@ipapi_data, tracking_network)
|
||||||
|
|
||||||
|
assert_equal 2, result[:networks].length
|
||||||
|
end
|
||||||
|
|
||||||
|
# Verify updates
|
||||||
|
company_network.reload
|
||||||
|
assert_equal "SERVERS TECH FZCO", company_network.company
|
||||||
|
assert company_network.network_data.key?("ipapi")
|
||||||
|
end
|
||||||
|
end
|
||||||
216
test/services/path_rule_matcher_test.rb
Normal file
216
test/services/path_rule_matcher_test.rb
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class PathRuleMatcherTest < ActiveSupport::TestCase
|
||||||
|
setup do
|
||||||
|
@user = User.create!(email_address: "test@example.com", password: "password123")
|
||||||
|
|
||||||
|
# Create path segments
|
||||||
|
@admin_segment = PathSegment.find_or_create_segment("admin")
|
||||||
|
@wp_login_segment = PathSegment.find_or_create_segment("wp-login.php")
|
||||||
|
@api_segment = PathSegment.find_or_create_segment("api")
|
||||||
|
@v1_segment = PathSegment.find_or_create_segment("v1")
|
||||||
|
@users_segment = PathSegment.find_or_create_segment("users")
|
||||||
|
@dashboard_segment = PathSegment.find_or_create_segment("dashboard")
|
||||||
|
end
|
||||||
|
|
||||||
|
test "exact match - matches exact path only" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/wp-login.php",
|
||||||
|
match_type: "exact",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create matching event
|
||||||
|
matching_event = create_event_with_segments([@wp_login_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, matching_event), "Should match exact path"
|
||||||
|
|
||||||
|
# Create non-matching event (extra segment)
|
||||||
|
non_matching_event = create_event_with_segments([@admin_segment.id, @wp_login_segment.id])
|
||||||
|
refute PathRuleMatcher.matches?(rule, non_matching_event), "Should not match path with extra segments"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "prefix match - matches paths starting with pattern" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "prefix",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should match /admin
|
||||||
|
event1 = create_event_with_segments([@admin_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event1), "Should match exact prefix"
|
||||||
|
|
||||||
|
# Should match /admin/dashboard
|
||||||
|
event2 = create_event_with_segments([@admin_segment.id, @dashboard_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event2), "Should match prefix with additional segments"
|
||||||
|
|
||||||
|
# Should match /admin/users/123
|
||||||
|
event3 = create_event_with_segments([@admin_segment.id, @users_segment.id, create_segment("123").id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event3), "Should match prefix with multiple additional segments"
|
||||||
|
|
||||||
|
# Should NOT match /api/admin (admin not at start)
|
||||||
|
event4 = create_event_with_segments([@api_segment.id, @admin_segment.id])
|
||||||
|
refute PathRuleMatcher.matches?(rule, event4), "Should not match when pattern not at start"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "suffix match - matches paths ending with pattern" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/wp-login.php",
|
||||||
|
match_type: "suffix",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should match /wp-login.php
|
||||||
|
event1 = create_event_with_segments([@wp_login_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event1), "Should match exact suffix"
|
||||||
|
|
||||||
|
# Should match /admin/wp-login.php
|
||||||
|
event2 = create_event_with_segments([@admin_segment.id, @wp_login_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event2), "Should match suffix with prefix segments"
|
||||||
|
|
||||||
|
# Should match /backup/admin/wp-login.php
|
||||||
|
backup_seg = create_segment("backup")
|
||||||
|
event3 = create_event_with_segments([backup_seg.id, @admin_segment.id, @wp_login_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event3), "Should match suffix with multiple prefix segments"
|
||||||
|
|
||||||
|
# Should NOT match /wp-login.php/test (suffix has extra segment)
|
||||||
|
test_seg = create_segment("test")
|
||||||
|
event4 = create_event_with_segments([@wp_login_segment.id, test_seg.id])
|
||||||
|
refute PathRuleMatcher.matches?(rule, event4), "Should not match when pattern not at end"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "contains match - matches paths containing pattern" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/admin",
|
||||||
|
match_type: "contains",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should match /admin
|
||||||
|
event1 = create_event_with_segments([@admin_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event1), "Should match exact contains"
|
||||||
|
|
||||||
|
# Should match /api/admin/users
|
||||||
|
event2 = create_event_with_segments([@api_segment.id, @admin_segment.id, @users_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event2), "Should match contains in middle"
|
||||||
|
|
||||||
|
# Should match /super/secret/admin/panel
|
||||||
|
super_seg = create_segment("super")
|
||||||
|
secret_seg = create_segment("secret")
|
||||||
|
panel_seg = create_segment("panel")
|
||||||
|
event3 = create_event_with_segments([super_seg.id, secret_seg.id, @admin_segment.id, panel_seg.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event3), "Should match contains with prefix and suffix"
|
||||||
|
|
||||||
|
# Should NOT match /administrator (different segment)
|
||||||
|
administrator_seg = create_segment("administrator")
|
||||||
|
event4 = create_event_with_segments([administrator_seg.id])
|
||||||
|
refute PathRuleMatcher.matches?(rule, event4), "Should not match different segment"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "contains match with multi-segment pattern" do
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/api/admin",
|
||||||
|
match_type: "contains",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should match /api/admin
|
||||||
|
event1 = create_event_with_segments([@api_segment.id, @admin_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event1), "Should match exact contains"
|
||||||
|
|
||||||
|
# Should match /v1/api/admin/users
|
||||||
|
event2 = create_event_with_segments([@v1_segment.id, @api_segment.id, @admin_segment.id, @users_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event2), "Should match consecutive segments in middle"
|
||||||
|
|
||||||
|
# Should NOT match /api/v1/admin (segments not consecutive)
|
||||||
|
event3 = create_event_with_segments([@api_segment.id, @v1_segment.id, @admin_segment.id])
|
||||||
|
refute PathRuleMatcher.matches?(rule, event3), "Should not match non-consecutive segments"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "case insensitive matching through PathSegment normalization" do
|
||||||
|
# PathSegment.find_or_create_segment normalizes to lowercase
|
||||||
|
rule = Rule.create_path_pattern_rule(
|
||||||
|
pattern: "/Admin/Users", # Mixed case
|
||||||
|
match_type: "exact",
|
||||||
|
action: "deny",
|
||||||
|
user: @user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Event with lowercase path should match
|
||||||
|
event = create_event_with_segments([@admin_segment.id, @users_segment.id])
|
||||||
|
assert PathRuleMatcher.matches?(rule, event), "Should match case-insensitively"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "matching_rules returns all matching rules" do
|
||||||
|
rule1 = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "prefix", action: "deny", user: @user)
|
||||||
|
rule2 = Rule.create_path_pattern_rule(pattern: "/admin/users", match_type: "exact", action: "allow", user: @user)
|
||||||
|
rule3 = Rule.create_path_pattern_rule(pattern: "/api", match_type: "prefix", action: "deny", user: @user)
|
||||||
|
|
||||||
|
event = create_event_with_segments([@admin_segment.id, @users_segment.id])
|
||||||
|
|
||||||
|
matching = PathRuleMatcher.matching_rules(event)
|
||||||
|
assert_includes matching, rule1, "Should include prefix rule"
|
||||||
|
assert_includes matching, rule2, "Should include exact rule"
|
||||||
|
refute_includes matching, rule3, "Should not include non-matching rule"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "evaluate returns first matching action" do
|
||||||
|
Rule.create_path_pattern_rule(pattern: "/admin", match_type: "prefix", action: "deny", user: @user)
|
||||||
|
|
||||||
|
event = create_event_with_segments([@admin_segment.id, @dashboard_segment.id])
|
||||||
|
|
||||||
|
action = PathRuleMatcher.evaluate(event)
|
||||||
|
assert_equal "deny", action, "Should return deny action"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "evaluate returns allow for non-matching event" do
|
||||||
|
Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
|
||||||
|
|
||||||
|
event = create_event_with_segments([@api_segment.id])
|
||||||
|
|
||||||
|
action = PathRuleMatcher.evaluate(event)
|
||||||
|
assert_equal "allow", action, "Should return allow for non-matching event"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "does not match disabled rules" do
|
||||||
|
rule = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
|
||||||
|
rule.update!(enabled: false)
|
||||||
|
|
||||||
|
event = create_event_with_segments([@admin_segment.id])
|
||||||
|
|
||||||
|
matching = PathRuleMatcher.matching_rules(event)
|
||||||
|
assert_empty matching, "Should not match disabled rules"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "does not match expired rules" do
|
||||||
|
rule = Rule.create_path_pattern_rule(pattern: "/admin", match_type: "exact", action: "deny", user: @user)
|
||||||
|
rule.update!(expires_at: 1.hour.ago)
|
||||||
|
|
||||||
|
event = create_event_with_segments([@admin_segment.id])
|
||||||
|
|
||||||
|
matching = PathRuleMatcher.matching_rules(event)
|
||||||
|
assert_empty matching, "Should not match expired rules"
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def create_event_with_segments(segment_ids)
|
||||||
|
Event.new(
|
||||||
|
request_id: SecureRandom.uuid,
|
||||||
|
timestamp: Time.current,
|
||||||
|
request_segment_ids: segment_ids,
|
||||||
|
ip_address: "1.2.3.4"
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_segment(text)
|
||||||
|
PathSegment.find_or_create_segment(text)
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user