From 886a387740eb2b2f0d9d20daf481950f448935c4 Mon Sep 17 00:00:00 2001 From: Alexander Olofsson Date: Tue, 17 Dec 2024 15:59:04 +0100 Subject: [PATCH] Add stub for SB story metadata extraction --- .../backends/spacebattles/backend.rb | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/lib/fic_tracker/backends/spacebattles/backend.rb b/lib/fic_tracker/backends/spacebattles/backend.rb index 36c4807..cecf7a3 100644 --- a/lib/fic_tracker/backends/spacebattles/backend.rb +++ b/lib/fic_tracker/backends/spacebattles/backend.rb @@ -123,9 +123,40 @@ module FicTracker::Backends::Spacebattles private def extract_story(page) + header = page.at_css('.p-body-header') + threadmark_header = page.at_css('.threadmarkListingHeader-content') + + authors = header.at_css('.p-description').css('username').map do |user| + { + slug: user[:'data-user-id'], + name: user.test.strip, + url: File.join(client.BASE_URL, user[:href]), + }.compact + end + tags = header.css('.p-description dl.tagList a.tagItem').map do |tag| + { + name: tag.text, + category: tag.at_css('i')[:title].downcase.to_sym, + } + end + + header_blocks = threadmark_header.css('dl').to_hash do |block| + [block.at_css('dt').text.strip.downcase.to_sym, block.at_css('dd')] + end + edited = threadmark_header.at_css('dl.message-lastEdit') + updated_at = Time.parse(edited.at_css('time')[:datetime]) if edited { - } + name: header.at_css('.p-title').text.strip, + authors:, + synopsis: threadmark_header.at_css('.threadmarkListingHeader-extraInfo > article > div').children.to_xml.strip, + url:, + chapter_count: header_blocks[:threadmarks].text.to_i, + completed: header_blocks[:status].text != 'Ongoing', + published_at: Time.parse(header_blocks[:created].at_css('time')[:datetime]), + updated_at:, + tags: FicTracker::Models::Light::Tag.load(tags), + }.compact end def extract_chapter(post) @@ -140,7 +171,7 @@ module FicTracker::Backends::Spacebattles name: threadmark.text, url: post[:itemid], - published_at: Time.at(latest['data-time'].to_i), + published_at: Time.at(latest[:'data-time'].to_i), last_refresh: Time.now, content: html,