require "time" module Picopackage class Provider def self.for(url) PROVIDERS.each do |provider| case provider.handles_url?(url) when false next when true return provider.new(url) when :maybe instance = provider.new(url) return instance if instance.handles_body? end end nil # Return nil if no provider found end end # Base class for fetching content from a URL # The variable `body` will contain the package_data retrieved from the URL # The variable `package_data` will contain both and payload + metadata - this would be writen to a file. # The variable `payload` will contain the payload extracted from `package_data` # The variable `metadata` will contain the metadata extracted from `package_data` # Job of the Provider class is to fetch the body from the URL, and then extract the package_data # and the filename from the body. The SourceFile class will then take the body and split it into payload and metadata class DefaultProvider MAX_SIZE = 1024 * 1024 TIMEOUT = 10 attr_reader :url def self.handles_url?(url) = :maybe def initialize(url) @url = transform_url(url) @uri = URI(@url) @body = nil @content = nil end def body = @body ||= fetch def json_body = @json_body ||= JSON.parse(body) def transform_url(url) = url def fetch Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.scheme == "https", read_timeout: TIMEOUT, open_timeout: TIMEOUT) do |http| http.request_get(@uri.path) do |response| raise "Unexpected response: #{response.code}" unless response.is_a?(Net::HTTPSuccess) @body = String.new(capacity: MAX_SIZE) response.read_body do |chunk| if @body.bytesize + chunk.bytesize > MAX_SIZE raise FileTooLargeError, "Response would exceed #{MAX_SIZE} bytes" end @body << chunk end @body end end @body end def handles_body? true rescue FileTooLargeError, Net::HTTPError, RuntimeError false end # Implement in subclass - this come from the `body`. # Spliting content into payload and metadata is the job of the SourceFile class def content = body # Implement in subclass - this should return the filename extracted from the body - if it exists, but not from the metadata def filename = File.basename @url def source_file @source_file ||= SourceFile.from_content(content, metadata: {"filename" => filename, "url" => url, "packaged_at" => packaged_at}.compact) end end class GithubGistProvider < DefaultProvider def self.handles_url?(url) = url.match?(%r{gist\.github\.com}) def content = json_body["files"].values.first["content"] def filename = json_body["files"].values.first["filename"] def transform_url(url) gist_id = url[/gist\.github\.com\/[^\/]+\/([a-f0-9]+)/, 1] "https://api.github.com/gists/#{gist_id}" end def packaged_at Time.parse(json_body["created_at"]) rescue ArgumentError nil end end class OpenGistProvider < DefaultProvider def handles_url?(url) = :maybe def transform_url(url) = "#{url}.json" def content = json_body.dig("files", 0, "content") def filename = json_body.dig("files", 0, "filename") def handles_body? content && filename rescue FileTooLargeError, Net::HTTPError, RuntimeError false end # If we successfully fetch the body, and the body contains content and a filename, then we can handle the body end PROVIDERS = [ GithubGistProvider, OpenGistProvider, DefaultProvider ].freeze end