From 8dcb61154e4bd987028d5e9a2d40113f8dcd63c5 Mon Sep 17 00:00:00 2001 From: Alexander Olofsson Date: Fri, 18 Oct 2024 07:32:54 +0200 Subject: [PATCH] Add stub for SpaceBattles --- .../backends/spacebattles/backend.rb | 152 ++++++++++++++++++ .../backends/spacebattles/client.rb | 108 +++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 lib/fic_tracker/backends/spacebattles/backend.rb create mode 100644 lib/fic_tracker/backends/spacebattles/client.rb diff --git a/lib/fic_tracker/backends/spacebattles/backend.rb b/lib/fic_tracker/backends/spacebattles/backend.rb new file mode 100644 index 0000000..36c4807 --- /dev/null +++ b/lib/fic_tracker/backends/spacebattles/backend.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +require_relative 'client' + +module FicTracker::Backends::Spacebattles + class Backend < FicTracker::Backend + def client + @client ||= Client.new + end + + def full_name + 'SpaceBattles' + end + + def url + Client::BASE_URL + end + + def self.supports_url?(story) + story.to_s.start_with? Client::BASE_URL + end + + def load_author(author) + author = FicTracker::Models::Author.new(slug: parse_slug(author), backend: self) unless author.is_a? FicTracker::Models::Author + + url = "/members/#{author.slug}" + page = client.request(url) + + author.set( + name: page.at_css('.memberHeader-name').text, + url:, + image: page.at_css('.memberHeader-avatar img')[:src], + last_metadata_refresh: Time.now, + ) + end + + def load_story(story) + story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story + + logger.info "Loading story #{story.slug}" + page = client.request("/threads/#{story.slug}/reader") + attrs = extract_story(page) + story.set( + last_metadata_refresh: Time.now, + **attrs + ) + end + + def find_chapters(story) + story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story + + chapters = [] + client.paginated :query do + marks = client.request("/threads/#{story.slug}/threadmarks") + + marks.at_css('div.block-body--threadmarkBody').css('div.structItem').each_with_index do |item, index| + main = item.at_css('.structItem-cell--main a') + latest = item.at_css('.structItem-cell--main time') + + slug = main[:href].split('-').last + + chapters << { + slug:, + index:, + name: main.text, + url: URI.join(url, "/posts/#{slug}/"), + published_at: Time.at(latest['data-time'].to_i), + } + end + end + + story.chapters = chapters + story.set( + last_content_refresh: Time.now, + ) + end + + def load_full_story(story) + story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story + + attrs = nil + chapters = [] + client.paginated :path do + page = client.request("/threads/#{story.slug}/reader") + + attrs = extract_story(page) if page.at_css('.threadmarkListingHeader-content') + + page.css('article.hasThreadmark').each do |post| + chapters << extract_chapter(post) + end + end + + story.chapters = chapters + story.set( + last_metadata_refresh: Time.now, + last_content_refresh: Time.now, + **attrs + ) + end + + def load_chapter(chapter, story) + story = load_story(story) unless story.is_a? FicTracker::Models::Story + chapter = FicTracker::Models::Chapter.new(slug: parse_slug(chapter), story: story) unless chapter.is_a? FicTracker::Models::Chapter + + page = client.request "/threads/#{story.slug}/#{chapter.slug}" + post = page.at_xpath("//article[data-content=\"#{chapter.slug.split('#').last}\"]") + + chapter.set(**extract_chapter(post)) + end + + def parse_slug(slug) + return slug.to_s unless slug.start_with?(url) + + type, *components = slug.sub(url, '').split('/') + case type + when 'members', 'threads' + return components.first + end + + slug.to_s + end + + private + + def extract_story(page) + + { + } + end + + def extract_chapter(post) + threadmark = post.at_css('span.threadmarkLabel') + link = post.at_css('header ul.message-attribution-main a') + latest = link.at_css('time') + + html = post.at_css('.message-content article .bbWrapper').to_xml + + { + slug: post[:itemid].split('/').last, + + name: threadmark.text, + url: post[:itemid], + published_at: Time.at(latest['data-time'].to_i), + last_refresh: Time.now, + + content: html, + content_type: 'text/html', + etag: Digest::SHA1.hexdigest(html), + } + end + end +end diff --git a/lib/fic_tracker/backends/spacebattles/client.rb b/lib/fic_tracker/backends/spacebattles/client.rb new file mode 100644 index 0000000..21e3d4f --- /dev/null +++ b/lib/fic_tracker/backends/spacebattles/client.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +require 'net/http' +require 'nokogiri' + +module FicTracker::Backends::Spacebattles + class Client + BASE_URL = 'https://forums.spacebattles.com' + + def initialize + @pagination_query = nil + @pagination_path = nil + end + + def url + URI(BASE_URL) + end + + def request(path, query: nil, method: :get) + path = File.join(path, @pagination_path) if @pagination_path + uri = URI.join(url, path) + query = @pagination_query.merge query if @pagination_query + uri.query = URI.encode_www_form(query) if query + + req = Net::HTTP.const_get(method.to_s.capitalize.to_sym).new uri.request_uri + req['User-Agent'] = "FicTracker/#{FicTracker::VERSION}" + req['Accept'] = 'text/html' + + resp = nil + http.start do + loop do + debug_http(req) + resp = http.request req + debug_http(resp) + case resp + when Net::HTTPRedirection + uri = URI.join(url, resp['location']) + uri.query = URI.encode_www_form(query) if query + req.path.replace uri.request_uri + when Net::HTTPTooManyRequests + wait_time = 10 + if resp['retry-after'] + after = resp['retry-after'] + wait_time = after =~ /\A[0-9]+\Z/ ? after.to_i : Time.parse(after) + end + wait_time = Time.now + wait_time if wait_time.is_a? Numeric + + logger.info "Rate limited, waiting until #{wait_time} before retrying" + sleep wait_time - Time.now + else + break + end + end + end + + resp.value + return if resp.body.empty? + + Nokogiri::HTML4.parse(resp.body) + end + + def paginated(style, per_page: 200, &_block) + raise 'Style must be :path or :query' unless %i[path query].include? style + @pagination_query = { per_page: per_page } if style == :query + @pagination_path = '' if style == :path + page = 1 + + loop do + result = yield + + return result unless result.at_css('nav.pageNavWrapper .pageNav-jump--next') + + page += 1 + @pagination_query[:page] = page if style == :query + @pagination_path = "page-#{page}" if style == :path + end + ensure + @pagination_query = nil + @pagination_path = nil + end + + private + + def logger + Logging.logger[self] + end + + def http + @http ||= Net::HTTP.new(url.host, url.port).tap { |http| http.use_ssl = url.scheme == 'https' } + end + + def debug_http(object) + return unless logger.debug? + + dir = '>' + if object.is_a?(Net::HTTPRequest) + dir = '<' + + logger.debug "#{dir} #{object.method} #{object.path}" + else + logger.debug "#{dir} #{object.code} #{object.message}" + end + + object.each_header { |h, v| logger.debug "#{dir} #{h}: #{v}" } + end + end +end +