Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

This commit is contained in:
Dan Milne
2025-11-18 16:40:05 +11:00
parent ef56779584
commit 3f274c842c
37 changed files with 3522 additions and 151 deletions

View File

@@ -0,0 +1,171 @@
<% content_for :title, "Bot Network Ranges" %>
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Header -->
<div class="mb-8">
<h1 class="text-3xl font-bold text-gray-900 mb-2">Bot Network Ranges</h1>
<p class="text-gray-600">Import and manage official network ranges for search crawlers and API bots</p>
</div>
<!-- Available Sources -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Available Sources</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
<% @bot_sources.each do |key, source| %>
<div class="border rounded-lg p-4 hover:bg-gray-50 transition-colors">
<div class="flex items-start justify-between mb-2">
<h3 class="font-medium text-gray-900"><%= source[:name] %></h3>
<span class="px-2 py-1 text-xs font-medium rounded-full <%= source[:url] ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= source[:url] ? 'Available' : 'Manual' %>
</span>
</div>
<p class="text-sm text-gray-600 mb-4"><%= source[:description] %></p>
<div class="flex flex-wrap gap-2">
<%= form_with url: import_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Now",
class: "px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded hover:bg-blue-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= form_with url: import_async_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Async",
class: "px-3 py-1 text-xs font-medium text-white bg-purple-600 rounded hover:bg-purple-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= link_to "View", bot_network_range_path(key),
class: "px-3 py-1 text-xs font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
</div>
</div>
<% end %>
</div>
</div>
</div>
<!-- Batch Import -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Batch Import</h2>
</div>
<div class="p-6">
<p class="text-gray-600 mb-4">Import from all available sources (this may take several minutes).</p>
<%= form_with url: import_all_bot_network_ranges_path, method: :post do |f| %>
<div class="flex items-center gap-4">
<%= f.submit "Import All Sources",
class: "px-6 py-2 font-medium text-white bg-green-600 rounded hover:bg-green-700 transition-colors",
confirm: "This will import from all available sources and may take several minutes. Continue?" %>
</div>
<% end %>
</div>
</div>
<!-- Recent Imports -->
<% if @recent_imports.any? %>
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recent Imports</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Status</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Records</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Notes</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @recent_imports.each do |import| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= import.source.titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap">
<span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= import.status == 'completed' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= import.status.titleize %>
</span>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.records_processed&.to_s || '0' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<%= import.notes %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
<!-- Recent Bot Network Ranges -->
<% if @bot_network_ranges.any? %>
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recently Imported Bot Ranges</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @bot_network_ranges.each do |range| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= range.network %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.source.gsub('bot_import_', '').titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.company || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<% if range.additional_data.present? %>
<% data = JSON.parse(range.additional_data) rescue {} %>
<% if data['crawler_type'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
<%= data['crawler_type'].titleize %>
</span>
<% end %>
<% if data['aws_service'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
<%= data['aws_service'] %>
</span>
<% end %>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
</div>
<!-- Real-time updates via Turbo Streams -->
<turbo-stream-source src="/cable" channel="BotImportsChannel"></turbo-stream-source>