fic_tracker/lib/fic_tracker/backends/ao3/backend.rb

236 lines
6.8 KiB
Ruby

# frozen_string_literal: true
require_relative 'client'
require_relative 'search_info'
module FicTracker::Backends::Ao3
class Backend < FicTracker::Backend
TAG_ORDERING = {
rating: -3,
category: -2,
warning: -1,
}.freeze
def client
@client ||= Client.new
end
def full_name
'Archive of Our Own'
end
def url
Client::BASE_URL
end
def load_author(author)
author = FicTracker::Models::Author.new(slug: parse_slug(author), backend: self) unless author.is_a? FicTracker::Models::Author
logger.info "Loading author #{author}"
slug, pseud = author.slug.split('/')
pseud ||= slug
doc = client.request("/users/#{slug}/pseuds/#{pseud}")
user = doc.at_css('#main .user')
name = user.at_css('h2.heading').text.sub(/\([^)]+\)$/, '').strip
name = nil if name == pseud
url = user.at_css('.icon a')
url = URI.join(Client::BASE_URL, url[:href]) if url
image = user.at_css('.icon a img')
image = URI.join(Client::BASE_URL, image[:src]) if image
author.set(
name: name,
url: url&.to_s,
image: image&.to_s,
last_metadata_refresh: Time.now
)
end
def load_story(story)
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
logger.info "Loading story #{story.slug}"
doc = client.request("/works/#{story.slug}", query: { view_adult: true })
attrs = extract_story doc
story.set(last_metadata_refresh: Time.now, **attrs)
end
def find_chapters(story)
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
logger.info "Loading chapters for #{story.slug}"
doc = client.request("/works/#{story.slug}/navigate")
chapters = doc.at_css('ol.chapter').css('li').map do |entry|
published_at = Time.parse(entry.at_css('span.datetime').text.strip)
link = entry.at_css('a')
index, *name = link.text.split('. ')
index = index.to_i
name = name.join('. ')
url = URI.join(Client::BASE_URL, link[:href])
slug = url.path.split('/').last
{
slug: slug,
index: index,
name: name,
url: url.to_s,
published_at: published_at,
}
end
story.set(
last_content_refresh: Time.now,
)
story.chapters = chapters
story
end
def load_full_story(story)
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
logger.info "Loading all chapters for #{story.slug}"
doc = client.request("/works/#{story.slug}", query: { view_full_work: true, view_adult: true })
attrs = extract_story(doc)
chapters = doc.css('#chapters > div.chapter').map { |chapter| extract_chapter(chapter) }
story.set(
last_metadata_refresh: Time.now,
last_content_refresh: Time.now,
**attrs
)
story.chapters = chapters
story
end
def load_chapter(chapter, story)
story = load_story(story) unless story.is_a? FicTracker::Models::Story
chapter = FicTracker::Models::Chapter.new(slug: parse_slug(chapter), story: story) unless chapter.is_a? FicTracker::Models::Chapter
logger.info "Loading chapter #{chapter.slug} for #{story.slug}"
doc = client.request("/works/#{story.slug}/chapters/#{chapter.slug}", query: { view_adult: true })
attrs = extract_chapter(doc.at_css('#chapters > div.chapter'))
chapter.set(**attrs)
end
def get_search_info
SearchInfo.new(self)
end
def search(search_info)
info = SearchInfo.from_search(search_info)
info
end
def parse_slug(slug)
return URI(slug).path.split('/').last if slug.is_a?(String) && slug.start_with?('http')
slug.to_s
end
private
def extract_story(doc)
url = URI.join(Client::BASE_URL, doc.at_css('li.share a')[:href].delete_suffix('/share'))
meta = doc.at_css('#main dl.meta')
preface = doc.at_css('#workskin .preface')
name = preface.at_css('.title').text.strip
synopsis = preface.at_css('.summary blockquote').children.to_xml.strip
language = meta.at_css('dd.language')['lang']
authors = preface.css('a[rel="author"]').map do |aut|
slug = aut[:href].split('/')[2]
pseud = aut[:href].split('/').last
if slug != pseud
name = pseud
slug = "#{slug}/#{pseud}"
end
{
slug: slug,
name: name,
url: aut[:href],
}.compact
end
tags = meta.css('dd.tags').map do |tagblock|
category = tagblock[:class].split.first.to_sym
tagblock.css('a.tag').map do |a|
next if ['Creator Chose Not To Use Archive Warnings', 'No Archive Warnings Apply'].include? a.text.strip
{
name: a.text.strip,
category: category,
important: %i[rating warning category].include?(category) ? true : nil,
ordering: TAG_ORDERING[category],
}.compact
end.compact
end.flatten
chapters = meta.at_css('dl.stats dd.chapters').text.strip.split('/')
words = meta.at_css('dl.stats dd.words').text.strip.tr(',', '').to_i
published_at = meta.at_css('dl.stats dd.published')&.text&.strip
published_at = Time.parse(published_at) if published_at
updated_at = meta.at_css('dl.stats dd.status')&.text&.strip
updated_at = Time.parse(updated_at) if updated_at
{
name: name,
authors: authors,
synopsis: synopsis,
url: url.to_s,
language: language,
chapter_count: chapters.first.to_i,
word_count: words,
completed: chapters.first == chapters.last,
published_at: published_at,
updated_at: updated_at,
tags: FicTracker::Models::Light::Tag.load(tags),
}
end
def extract_chapter(chapter_doc)
link = chapter_doc.at_css('h3.title a')
url = URI.join(Client::BASE_URL, link[:href])
slug = url.path.split('/').last
index = link.text.strip.split.last.to_i
html = chapter_doc.at_css('div[role="article"]').children[2..].map(&:to_xml).join("\n")
title = chapter_doc.at_css('h3.title')
if title
title_base = title.at_css('a').text.strip
title_extra = title.text.strip.delete_prefix("#{title_base}:").strip
title = title_extra.empty? ? title_base : title_extra
end
{
index: index,
slug: slug,
name: title,
url: url.to_s,
last_refresh: Time.now,
content: html,
content_type: 'text/html',
etag: Digest::SHA1.hexdigest(html),
}
end
end
end