Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

This commit is contained in:
Dan Milne
2025-11-18 16:40:05 +11:00
parent ef56779584
commit 3f274c842c
37 changed files with 3522 additions and 151 deletions

View File

@@ -0,0 +1,171 @@
<% content_for :title, "Bot Network Ranges" %>
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Header -->
<div class="mb-8">
<h1 class="text-3xl font-bold text-gray-900 mb-2">Bot Network Ranges</h1>
<p class="text-gray-600">Import and manage official network ranges for search crawlers and API bots</p>
</div>
<!-- Available Sources -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Available Sources</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
<% @bot_sources.each do |key, source| %>
<div class="border rounded-lg p-4 hover:bg-gray-50 transition-colors">
<div class="flex items-start justify-between mb-2">
<h3 class="font-medium text-gray-900"><%= source[:name] %></h3>
<span class="px-2 py-1 text-xs font-medium rounded-full <%= source[:url] ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= source[:url] ? 'Available' : 'Manual' %>
</span>
</div>
<p class="text-sm text-gray-600 mb-4"><%= source[:description] %></p>
<div class="flex flex-wrap gap-2">
<%= form_with url: import_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Now",
class: "px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded hover:bg-blue-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= form_with url: import_async_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Async",
class: "px-3 py-1 text-xs font-medium text-white bg-purple-600 rounded hover:bg-purple-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= link_to "View", bot_network_range_path(key),
class: "px-3 py-1 text-xs font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
</div>
</div>
<% end %>
</div>
</div>
</div>
<!-- Batch Import -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Batch Import</h2>
</div>
<div class="p-6">
<p class="text-gray-600 mb-4">Import from all available sources (this may take several minutes).</p>
<%= form_with url: import_all_bot_network_ranges_path, method: :post do |f| %>
<div class="flex items-center gap-4">
<%= f.submit "Import All Sources",
class: "px-6 py-2 font-medium text-white bg-green-600 rounded hover:bg-green-700 transition-colors",
confirm: "This will import from all available sources and may take several minutes. Continue?" %>
</div>
<% end %>
</div>
</div>
<!-- Recent Imports -->
<% if @recent_imports.any? %>
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recent Imports</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Status</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Records</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Notes</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @recent_imports.each do |import| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= import.source.titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap">
<span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= import.status == 'completed' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= import.status.titleize %>
</span>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.records_processed&.to_s || '0' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<%= import.notes %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
<!-- Recent Bot Network Ranges -->
<% if @bot_network_ranges.any? %>
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recently Imported Bot Ranges</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @bot_network_ranges.each do |range| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= range.network %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.source.gsub('bot_import_', '').titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.company || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<% if range.additional_data.present? %>
<% data = JSON.parse(range.additional_data) rescue {} %>
<% if data['crawler_type'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
<%= data['crawler_type'].titleize %>
</span>
<% end %>
<% if data['aws_service'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
<%= data['aws_service'] %>
</span>
<% end %>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
</div>
<!-- Real-time updates via Turbo Streams -->
<turbo-stream-source src="/cable" channel="BotImportsChannel"></turbo-stream-source>

View File

@@ -0,0 +1,175 @@
<% content_for :title, "#{@source_name} Network Ranges" %>
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Header -->
<div class="mb-8">
<div class="flex items-center justify-between">
<div>
<h1 class="text-3xl font-bold text-gray-900 mb-2"><%= @source_name %> Network Ranges</h1>
<p class="text-gray-600">Network ranges imported from <%= @source_name %> official sources</p>
</div>
<div class="flex space-x-3">
<%= link_to "Back to Sources", bot_network_ranges_path,
class: "px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
<%= form_with url: bot_network_range_path(params[:source]), method: :delete, class: "inline" do |f| %>
<%= f.submit "Delete All Ranges",
class: "px-4 py-2 text-sm font-medium text-white bg-red-600 rounded hover:bg-red-700 transition-colors",
confirm: "Are you sure you want to delete all #{@source_name} network ranges? This action cannot be undone." %>
<% end %>
</div>
</div>
</div>
<!-- Statistics -->
<% if @import_stats.any? %>
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Import Statistics</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
<% @import_stats.each do |source, count| %>
<div class="text-center">
<div class="text-3xl font-bold text-blue-600"><%= count %></div>
<div class="text-sm text-gray-600 mt-1"><%= source.gsub('bot_import_', '').titleize %></div>
</div>
<% end %>
</div>
</div>
</div>
<% end %>
<!-- Network Ranges Table -->
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-4 border-b border-gray-200">
<div class="flex items-center justify-between">
<h2 class="text-lg font-semibold text-gray-900">Network Ranges</h2>
<div class="text-sm text-gray-500">
Showing <%= @network_ranges.offset_value + 1 %> to <%= [@network_ranges.offset_value + @network_ranges.current_page_count, @network_ranges.total_count].min %>
of <%= @network_ranges.total_count %> ranges
</div>
</div>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Country</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @network_ranges.each do |range| %>
<tr class="hover:bg-gray-50">
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= link_to range.network, network_range_path(range), class: "text-blue-600 hover:text-blue-800" %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.source.gsub('bot_import_', '').titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.company || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.country || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<% if range.additional_data.present? %>
<% data = JSON.parse(range.additional_data) rescue {} %>
<div class="flex flex-wrap gap-1">
<% if data['crawler_type'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
<%= data['crawler_type'].titleize %>
</span>
<% end %>
<% if data['crawler_purpose'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-purple-100 text-purple-800" title="<%= data['crawler_purpose'] %>">
Purpose
</span>
<% end %>
<% if data['aws_service'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
<%= data['aws_service'] %>
</span>
<% end %>
<% if data['aws_region'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-green-100 text-green-800">
<%= data['aws_region'] %>
</span>
<% end %>
<% if data['ip_version'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-gray-100 text-gray-800">
IPv<%= data['ip_version'] %>
</span>
<% end %>
</div>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
<!-- Pagination -->
<% if @network_ranges.total_pages > 1 %>
<div class="px-6 py-4 border-t border-gray-200">
<div class="flex items-center justify-between">
<div class="text-sm text-gray-700">
Page <%= @network_ranges.current_page %> of <%= @network_ranges.total_pages %>
</div>
<div class="flex space-x-2">
<% if @network_ranges.prev_page %>
<%= link_to "Previous", bot_network_range_path(params[:source], page: @network_ranges.prev_page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
<%# Show page numbers %>
<% (1..@network_ranges.total_pages).select { |p| p == 1 || p == @network_ranges.total_pages || (p - @network_ranges.current_page).abs <= 2 }.each do |page| %>
<% if page == @network_ranges.current_page %>
<span class="px-3 py-1 text-sm font-medium text-white bg-blue-600 rounded">
<%= page %>
</span>
<% else %>
<%= link_to page, bot_network_range_path(params[:source], page: page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
<% end %>
<% if @network_ranges.next_page %>
<%= link_to "Next", bot_network_range_path(params[:source], page: @network_ranges.next_page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
</div>
</div>
</div>
<% end %>
</div>
<% if @network_ranges.empty? %>
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-12 text-center">
<div class="text-gray-400 mb-4">
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
</div>
<h3 class="text-lg font-medium text-gray-900 mb-2">No network ranges found</h3>
<p class="text-gray-600 mb-6">
No <%= @source_name %> network ranges have been imported yet.
</p>
<%= link_to "Import #{@source_name} Ranges", bot_network_ranges_path,
class: "inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" %>
</div>
</div>
<% end %>
</div>