236 lines
6.8 KiB
Ruby
236 lines
6.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative 'client'
|
|
require_relative 'search_info'
|
|
|
|
module FicTracker::Backends::Ao3
|
|
class Backend < FicTracker::Backend
|
|
TAG_ORDERING = {
|
|
rating: -3,
|
|
category: -2,
|
|
warning: -1,
|
|
}.freeze
|
|
|
|
def client
|
|
@client ||= Client.new
|
|
end
|
|
|
|
def full_name
|
|
'Archive of Our Own'
|
|
end
|
|
|
|
def url
|
|
Client::BASE_URL
|
|
end
|
|
|
|
def load_author(author)
|
|
author = FicTracker::Models::Author.new(slug: parse_slug(author), backend: self) unless author.is_a? FicTracker::Models::Author
|
|
|
|
logger.info "Loading author #{author}"
|
|
slug, pseud = author.slug.split('/')
|
|
pseud ||= slug
|
|
|
|
doc = client.request("/users/#{slug}/pseuds/#{pseud}")
|
|
|
|
user = doc.at_css('#main .user')
|
|
|
|
name = user.at_css('h2.heading').text.sub(/\([^)]+\)$/, '').strip
|
|
name = nil if name == pseud
|
|
|
|
url = user.at_css('.icon a')
|
|
url = URI.join(Client::BASE_URL, url[:href]) if url
|
|
image = user.at_css('.icon a img')
|
|
image = URI.join(Client::BASE_URL, image[:src]) if image
|
|
|
|
author.set(
|
|
name: name,
|
|
url: url&.to_s,
|
|
image: image&.to_s,
|
|
last_metadata_refresh: Time.now
|
|
)
|
|
end
|
|
|
|
def load_story(story)
|
|
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
|
|
|
|
logger.info "Loading story #{story.slug}"
|
|
doc = client.request("/works/#{story.slug}", query: { view_adult: true })
|
|
|
|
attrs = extract_story doc
|
|
|
|
story.set(last_metadata_refresh: Time.now, **attrs)
|
|
end
|
|
|
|
def find_chapters(story)
|
|
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
|
|
|
|
logger.info "Loading chapters for #{story.slug}"
|
|
doc = client.request("/works/#{story.slug}/navigate")
|
|
|
|
chapters = doc.at_css('ol.chapter').css('li').map do |entry|
|
|
published_at = Time.parse(entry.at_css('span.datetime').text.strip)
|
|
link = entry.at_css('a')
|
|
index, *name = link.text.split('. ')
|
|
index = index.to_i
|
|
name = name.join('. ')
|
|
url = URI.join(Client::BASE_URL, link[:href])
|
|
slug = url.path.split('/').last
|
|
|
|
{
|
|
slug: slug,
|
|
index: index,
|
|
name: name,
|
|
url: url.to_s,
|
|
published_at: published_at,
|
|
}
|
|
end
|
|
|
|
story.set(
|
|
last_content_refresh: Time.now,
|
|
)
|
|
story.chapters = chapters
|
|
|
|
story
|
|
end
|
|
|
|
def load_full_story(story)
|
|
story = FicTracker::Models::Story.new(slug: parse_slug(story), backend: self) unless story.is_a? FicTracker::Models::Story
|
|
|
|
logger.info "Loading all chapters for #{story.slug}"
|
|
doc = client.request("/works/#{story.slug}", query: { view_full_work: true, view_adult: true })
|
|
|
|
attrs = extract_story(doc)
|
|
chapters = doc.css('#chapters > div.chapter').map { |chapter| extract_chapter(chapter) }
|
|
|
|
story.set(
|
|
last_metadata_refresh: Time.now,
|
|
last_content_refresh: Time.now,
|
|
**attrs
|
|
)
|
|
story.chapters = chapters
|
|
|
|
story
|
|
end
|
|
|
|
def load_chapter(chapter, story)
|
|
story = load_story(story) unless story.is_a? FicTracker::Models::Story
|
|
chapter = FicTracker::Models::Chapter.new(slug: parse_slug(chapter), story: story) unless chapter.is_a? FicTracker::Models::Chapter
|
|
|
|
logger.info "Loading chapter #{chapter.slug} for #{story.slug}"
|
|
doc = client.request("/works/#{story.slug}/chapters/#{chapter.slug}", query: { view_adult: true })
|
|
|
|
attrs = extract_chapter(doc.at_css('#chapters > div.chapter'))
|
|
|
|
chapter.set(**attrs)
|
|
end
|
|
|
|
def get_search_info
|
|
SearchInfo.new(self)
|
|
end
|
|
|
|
def search(search_info)
|
|
info = SearchInfo.from_search(search_info)
|
|
|
|
info
|
|
end
|
|
|
|
def parse_slug(slug)
|
|
return URI(slug).path.split('/').last if slug.is_a?(String) && slug.start_with?('http')
|
|
|
|
slug.to_s
|
|
end
|
|
|
|
private
|
|
|
|
def extract_story(doc)
|
|
url = URI.join(Client::BASE_URL, doc.at_css('li.share a')[:href].delete_suffix('/share'))
|
|
meta = doc.at_css('#main dl.meta')
|
|
preface = doc.at_css('#workskin .preface')
|
|
|
|
name = preface.at_css('.title').text.strip
|
|
synopsis = preface.at_css('.summary blockquote').children.to_xml.strip
|
|
language = meta.at_css('dd.language')['lang']
|
|
authors = preface.css('a[rel="author"]').map do |aut|
|
|
slug = aut[:href].split('/')[2]
|
|
pseud = aut[:href].split('/').last
|
|
|
|
if slug != pseud
|
|
name = pseud
|
|
slug = "#{slug}/#{pseud}"
|
|
end
|
|
|
|
{
|
|
slug: slug,
|
|
name: name,
|
|
url: aut[:href],
|
|
}.compact
|
|
end
|
|
|
|
tags = meta.css('dd.tags').map do |tagblock|
|
|
category = tagblock[:class].split.first.to_sym
|
|
|
|
tagblock.css('a.tag').map do |a|
|
|
next if ['Creator Chose Not To Use Archive Warnings', 'No Archive Warnings Apply'].include? a.text.strip
|
|
|
|
{
|
|
name: a.text.strip,
|
|
category: category,
|
|
important: %i[rating warning category].include?(category) ? true : nil,
|
|
ordering: TAG_ORDERING[category],
|
|
}.compact
|
|
end.compact
|
|
end.flatten
|
|
|
|
chapters = meta.at_css('dl.stats dd.chapters').text.strip.split('/')
|
|
words = meta.at_css('dl.stats dd.words').text.strip.tr(',', '').to_i
|
|
published_at = meta.at_css('dl.stats dd.published')&.text&.strip
|
|
published_at = Time.parse(published_at) if published_at
|
|
updated_at = meta.at_css('dl.stats dd.status')&.text&.strip
|
|
updated_at = Time.parse(updated_at) if updated_at
|
|
|
|
{
|
|
name: name,
|
|
authors: authors,
|
|
synopsis: synopsis,
|
|
url: url.to_s,
|
|
language: language,
|
|
chapter_count: chapters.first.to_i,
|
|
word_count: words,
|
|
completed: chapters.first == chapters.last,
|
|
published_at: published_at,
|
|
updated_at: updated_at,
|
|
tags: FicTracker::Models::Light::Tag.load(tags),
|
|
}
|
|
end
|
|
|
|
def extract_chapter(chapter_doc)
|
|
link = chapter_doc.at_css('h3.title a')
|
|
url = URI.join(Client::BASE_URL, link[:href])
|
|
slug = url.path.split('/').last
|
|
index = link.text.strip.split.last.to_i
|
|
|
|
html = chapter_doc.at_css('div[role="article"]').children[2..].map(&:to_xml).join("\n")
|
|
|
|
title = chapter_doc.at_css('h3.title')
|
|
if title
|
|
title_base = title.at_css('a').text.strip
|
|
title_extra = title.text.strip.delete_prefix("#{title_base}:").strip
|
|
|
|
title = title_extra.empty? ? title_base : title_extra
|
|
end
|
|
|
|
{
|
|
index: index,
|
|
slug: slug,
|
|
|
|
name: title,
|
|
url: url.to_s,
|
|
last_refresh: Time.now,
|
|
|
|
content: html,
|
|
content_type: 'text/html',
|
|
etag: Digest::SHA1.hexdigest(html),
|
|
}
|
|
end
|
|
end
|
|
end
|