From 0d7213bd1f8140a51a74d8b328a2016e55658520 Mon Sep 17 00:00:00 2001 From: "Alexander \"Ace\" Olofsson" Date: Mon, 27 May 2024 08:46:14 +0200 Subject: [PATCH] Use N-M for author-story relation, fix server --- Gemfile | 2 + config.ru | 10 +- fic_tracker.gemspec | 1 + lib/fic_tracker.rb | 6 +- lib/fic_tracker/backend.rb | 2 +- lib/fic_tracker/backends/ao3/backend.rb | 37 +++-- lib/fic_tracker/config.rb | 6 +- lib/fic_tracker/migrations/001_create_base.rb | 14 +- lib/fic_tracker/models/author.rb | 20 ++- lib/fic_tracker/models/chapter.rb | 14 +- lib/fic_tracker/models/light/tag.rb | 2 +- lib/fic_tracker/models/story.rb | 94 ++++++++++--- lib/fic_tracker/renderers/epub.rb | 9 +- lib/fic_tracker/renderers/html.rb | 19 ++- lib/fic_tracker/server.rb | 128 +++++++++++++----- lib/fic_tracker/util/cache.rb | 2 +- 16 files changed, 279 insertions(+), 87 deletions(-) diff --git a/Gemfile b/Gemfile index 3f8b1cc..8229524 100644 --- a/Gemfile +++ b/Gemfile @@ -10,3 +10,5 @@ gem "rake", "~> 13.0" gem "minitest", "~> 5.16" gem "rubocop", "~> 1.21" + +gem 'irb' diff --git a/config.ru b/config.ru index 732d2ac..6498d4a 100644 --- a/config.ru +++ b/config.ru @@ -1,7 +1,15 @@ # frozen_string_literal: true require 'fic_tracker' +require 'fic_tracker/server' FicTracker.configure! -map '/' ->() { run FicTracker::Server } +map '/health' do + run -> { [200, { 'Content-Type' => 'text/plain' }, ['OK']] } +end + +server = FicTracker::Server.new +map '/' do + run server +end diff --git a/fic_tracker.gemspec b/fic_tracker.gemspec index 9b5b60a..6a0ffc4 100644 --- a/fic_tracker.gemspec +++ b/fic_tracker.gemspec @@ -31,5 +31,6 @@ Gem::Specification.new do |spec| spec.add_dependency 'rubyzip' spec.add_dependency 'sequel' spec.add_dependency 'sinatra' + spec.add_dependency 'sinatra-contrib' spec.add_dependency 'sqlite3' end diff --git a/lib/fic_tracker.rb b/lib/fic_tracker.rb index 4a61063..cd76569 100644 --- a/lib/fic_tracker.rb +++ b/lib/fic_tracker.rb @@ -84,13 +84,15 @@ module FicTracker Epub end + stringio = nil unless attrs[:io] require 'stringio' attrs[:io] = StringIO.new - attrs[:_stringio] = true + stringio = true end - return klass.new(story, **attrs).render unless attrs[:stringio] + result = klass.new(story, **attrs).render + return result unless stringio attrs[:io].string end diff --git a/lib/fic_tracker/backend.rb b/lib/fic_tracker/backend.rb index c0e029e..f4c90b6 100644 --- a/lib/fic_tracker/backend.rb +++ b/lib/fic_tracker/backend.rb @@ -86,7 +86,7 @@ module FicTracker return unless model backend = const_get(model).const_get(:Backend) - backend.new(**FicTracker.config.dig(:backends, backend.config_name, default: {})) + backend.new(**FicTracker::Config.dig(:backends, backend.config_name, default: {})) rescue StandardError => e Logging.logger[backend].error "Failed to load, #{e.class}: #{e}" diff --git a/lib/fic_tracker/backends/ao3/backend.rb b/lib/fic_tracker/backends/ao3/backend.rb index 7a8c389..a344e8c 100644 --- a/lib/fic_tracker/backends/ao3/backend.rb +++ b/lib/fic_tracker/backends/ao3/backend.rb @@ -25,20 +25,26 @@ module FicTracker::Backends::Ao3 def load_author(author) author = FicTracker::Models::Author.new(slug: parse_slug(author), backend: self) unless author.is_a? FicTracker::Models::Author - logger.info "Loading author #{author.slug}" - doc = client.request("/users/#{author.slug}") + logger.info "Loading author #{author}" + slug, pseud = author.slug.split('/') + pseud ||= slug + + doc = client.request("/users/#{slug}/pseuds/#{pseud}") user = doc.at_css('#main .user') + name = user.at_css('h2.heading').text.sub(/\([^)]+\)$/, '').strip + name = nil if name == pseud + url = user.at_css('.icon a') url = URI.join(Client::BASE_URL, url[:href]) if url image = user.at_css('.icon a img') image = URI.join(Client::BASE_URL, image[:src]) if image author.set( - name: user.at_css('h2.heading').text.strip, - url: url.to_s, - image: image.to_s, + name: name, + url: url&.to_s, + image: image&.to_s, last_metadata_refresh: Time.now ) end @@ -131,9 +137,23 @@ module FicTracker::Backends::Ao3 preface = doc.at_css('#workskin .preface') name = preface.at_css('.title').text.strip - synopsis = preface.at_css('.summary blockquote').children.to_xml + synopsis = preface.at_css('.summary blockquote').children.to_xml.strip language = meta.at_css('dd.language')['lang'] - author = preface.at_css('a[rel="author"]')[:href].split('/')[2] + authors = preface.css('a[rel="author"]').map do |aut| + slug = aut[:href].split('/')[2] + pseud = aut[:href].split('/').last + + if slug != pseud + name = pseud + slug = "#{slug}/#{pseud}" + end + + { + slug: slug, + name: name, + url: aut[:href], + }.compact + end tags = meta.css('dd.tags').map do |tagblock| category = tagblock[:class].split.first.to_sym @@ -159,7 +179,7 @@ module FicTracker::Backends::Ao3 { name: name, - author: author, + authors: authors, synopsis: synopsis, url: url.to_s, language: language, @@ -198,6 +218,7 @@ module FicTracker::Backends::Ao3 content: html, content_type: 'text/html', + etag: Digest::SHA1.hexdigest(html), } end end diff --git a/lib/fic_tracker/config.rb b/lib/fic_tracker/config.rb index d5de1bb..3567b5a 100644 --- a/lib/fic_tracker/config.rb +++ b/lib/fic_tracker/config.rb @@ -4,6 +4,7 @@ require 'psych' require_relative 'util/cache' require_relative 'util/database' +require_relative 'util/hash_extensions' module FicTracker::Config class << self @@ -24,6 +25,7 @@ module FicTracker::Config encoding: dig(:cache, :encoding, default: nil), options: dig(:cache, :options, default: {}) ) + FicTracker.logger.level = dig(:log, :level, default: :info) Sequel::Model.db = FicTracker.database end @@ -49,8 +51,10 @@ module FicTracker::Config def load_internal @config_file = ENV['FT_CONFIG_FILE'] if ENV['FT_CONFIG_FILE'] begin + puts "Loading config #{@config_file}" @config = Psych.load(File.read(@config_file)).deep_transform_keys(&:to_sym) - rescue + rescue StandardError => e + puts "Failed to load config #{@config_file}, #{e.class}: #{e}" @config = {} end @config[:database] ||= {} diff --git a/lib/fic_tracker/migrations/001_create_base.rb b/lib/fic_tracker/migrations/001_create_base.rb index 705b465..254b4c8 100644 --- a/lib/fic_tracker/migrations/001_create_base.rb +++ b/lib/fic_tracker/migrations/001_create_base.rb @@ -19,7 +19,7 @@ Sequel.migration do String :backend_name, null: false index %i[slug backend_name], unique: true - String :name, null: false + String :name, null: true, default: nil String :url, null: true, default: nil String :image, null: true, default: nil @@ -30,6 +30,14 @@ Sequel.migration do String :data, null: false, text: true, default: '{}' end + create_table(:authors_stories) do + primary_key :id + + foreign_key :author_id, :authors, on_delete: :cascade, on_update: :cascade + foreign_key :story_id, :stories, on_delete: :cascade, on_update: :cascade + index %i[author_id story_id], unique: true + end + create_table(:chapters) do primary_key :id @@ -76,6 +84,8 @@ Sequel.migration do foreign_key :collection_id, :collections, on_delete: :cascade, on_update: :cascade foreign_key :story_id, :stories, on_delete: :cascade, on_update: :cascade + index %i[collection_id story_id], unique: true + Integer :index, null: true, default: nil end @@ -85,8 +95,6 @@ Sequel.migration do String :backend_name, null: false index %i[slug backend_name], unique: true - foreign_key :author_id, :authors, on_delete: :cascade, on_update: :cascade - String :name, null: false String :synopsis, null: false, text: true String :language, null: true, default: 'en' diff --git a/lib/fic_tracker/models/author.rb b/lib/fic_tracker/models/author.rb index 77d5379..ec03d7a 100644 --- a/lib/fic_tracker/models/author.rb +++ b/lib/fic_tracker/models/author.rb @@ -2,12 +2,12 @@ module FicTracker::Models class Author < Sequel::Model - # 1/week - METADATA_REFRESH_INTERVAL = 7 * 24 * 60 * 60 + # 2/month + METADATA_REFRESH_INTERVAL = 14 * 24 * 60 * 60 plugin :serialization, :json, :data - one_to_many :stories + many_to_many :stories def before_create backend.load_author(self) if last_metadata_refresh.nil? @@ -15,6 +15,11 @@ module FicTracker::Models super end + def self.expire + # TODO: Improve + all.select { |a| a.stories.empty? }.each(&:destroy) + end + def backend return unless backend_name @@ -40,9 +45,14 @@ module FicTracker::Models Time.now - (last_metadata_refresh || Time.at(0)) >= METADATA_REFRESH_INTERVAL end + def self.needing_metadata_refresh + where { Sequel.|(last_metadata_refresh.nil?, last_metadata_refresh < date.function(Time.now - METADATA_REFRESH_INTERVAL, 'localtime')) } + end + def to_s - name || slug + return name if name + + slug end end end - diff --git a/lib/fic_tracker/models/chapter.rb b/lib/fic_tracker/models/chapter.rb index 93375f9..97ef6fb 100644 --- a/lib/fic_tracker/models/chapter.rb +++ b/lib/fic_tracker/models/chapter.rb @@ -6,12 +6,23 @@ module FicTracker::Models class Chapter < Sequel::Model # 3/day CONTENT_REFRESH_INTERVAL = 12 * 60 * 60 - CONTENT_CACHE_TIME = 7 * 24 * 60 * 60 + CONTENT_CACHE_TIME = 14 * 24 * 60 * 60 plugin :serialization, :json, :data many_to_one :story + def after_create + if @content + key = cache_key + [:content] + FicTracker.cache.set(key, @content, CONTENT_CACHE_TIME) + end + if @content_type + key = cache_key + [:content_type] + FicTracker.cache.set(key, @content_type, CONTENT_CACHE_TIME) + end + end + def to_s return "Chapter #{index}" unless name @@ -46,7 +57,6 @@ module FicTracker::Models FicTracker.cache.set(key, content, CONTENT_CACHE_TIME) end @content = content - etag = Digest::SHA1.hexdigest(content) end def content_type? diff --git a/lib/fic_tracker/models/light/tag.rb b/lib/fic_tracker/models/light/tag.rb index bd07bfc..9f33caa 100644 --- a/lib/fic_tracker/models/light/tag.rb +++ b/lib/fic_tracker/models/light/tag.rb @@ -11,7 +11,7 @@ module FicTracker::Models::Light end def self.store(data) - JSON.generate(data) + data = JSON.generate(data) end attr_accessor :id, :name, :category diff --git a/lib/fic_tracker/models/story.rb b/lib/fic_tracker/models/story.rb index 0541a6b..9c07136 100644 --- a/lib/fic_tracker/models/story.rb +++ b/lib/fic_tracker/models/story.rb @@ -8,29 +8,28 @@ module FicTracker::Models METADATA_REFRESH_INTERVAL = 7 * 24 * 60 * 60 # 3/day CONTENT_REFRESH_INTERVAL = 12 * 60 * 60 + # 2 months + STORY_EXPIRY = 2 * 30 * 24 * 60 * 60 - many_to_one :author + many_to_many :authors one_to_many :chapters, order: :index many_to_many :collection, join_table: :collection_stories plugin :serialization, [Light::Tag.method(:store), Light::Tag.method(:load)], :tags plugin :serialization, :json, :data - # Defer creation of author/chapters until story requiring them is to be saved + # Defer creation of authors/chapters until the story requiring them is to be saved def before_create - if @author - @author.save unless @author.id - self.author_id = @author.id - end + @authors.reject(&:id).each(&:save) if @authors super end def after_create - return if [@author, @chapters].all?(&:nil?) + return if [@authors, @chapters].all?(&:nil?) - self.author = @author if @author - @author = nil + self.authors = @authors if @authors + @authors = nil if @chapters latest_chapter_at = self.updated_at || self.published_at || Time.at(0) @@ -43,13 +42,21 @@ module FicTracker::Models @chapters = nil end + def self.expire + where { last_accessed < date.function(Time.now - FicTracker::Models::Story::STORY_EXPIRY, 'localtime') }.destroy + end + def completed? completed end # Support attaching author to a not-yet-saved story + def authors + @authors || super + end + def author - @author || super + authors.first end def author=(author_name) @@ -58,12 +65,44 @@ module FicTracker::Models author ||= backend.load_author(author_name) if id author ||= Author.new(backend: backend, slug: author_name) + self.authors = [author] + end + + def authors=(authors) + to_add = [] + to_remove = self.authors.map(&:id) + + authors.each do |entry| + author = entry if entry.is_a?(FicTracker::Models::Author) + + if author + to_add << author + else + author = self.authors.find { |c| c.slug == entry[:slug] } + + if author + author.set(**entry) + else + entry[:backend_name] = backend.name + author = FicTracker::Models::Author.new(**entry) + to_add << author + end + end + to_remove.delete author.id if author.id + end + if id - @author = nil - author.save unless author.id - super(author) + @authors = nil + to_add.each do |entry| + logger.debug "Adding author #{entry.inspect} to story #{self}" + add_author entry + end + if to_remove.any? + logger.debug "Removing author(s) #{to_remove.inspect} from story #{self}" + author_dataset.where(id: to_remove).destroy + end else - @author = author + @authors = to_add end end @@ -133,7 +172,7 @@ module FicTracker::Models end def etag - chapters.select { |c| c.etag }.last + chapters.select { |c| c.etag }.compact.last&.etag end def cache_key @@ -166,12 +205,35 @@ module FicTracker::Models Time.now - (last_metadata_refresh || Time.at(0)) >= METADATA_REFRESH_INTERVAL end + def self.needing_metadata_refresh + where { Sequel.|(last_metadata_refresh.nil?, last_metadata_refresh < date.function(Time.now - METADATA_REFRESH_INTERVAL, 'localtime')) } + end + def needs_content_refresh? Time.now - (last_content_refresh || Time.at(0)) >= (completed? ? METADATA_REFRESH_INTERVAL : CONTENT_REFRESH_INTERVAL) end + def self.needing_content_refresh + where { Sequel.|(last_content_refresh.nil?, last_content_refresh < date.function(Time.now - (completed? ? METADATA_REFRESH_INTERVAL : CONTENT_REFRESH_INTERVAL), 'localtime')) } + end + def to_s - "#{name}, by #{author.nil? ? '' : author.to_s}" + author_names = authors.map(&:to_s) + if author_names.empty? + author_names = '' + elsif author_names.size == 1 + author_names = author_names.first + else + author_names = author_names.reduce('') do |string, aut| + string = string.dup + string += 'and ' if aut != author_names.first && aut == author_names.last + string += aut + string += ', ' unless aut == author_names.last + string + end + end + + "#{name}, by #{author_names}" end def uid diff --git a/lib/fic_tracker/renderers/epub.rb b/lib/fic_tracker/renderers/epub.rb index e1552fd..5654cd0 100644 --- a/lib/fic_tracker/renderers/epub.rb +++ b/lib/fic_tracker/renderers/epub.rb @@ -79,12 +79,15 @@ module FicTracker::Renderers require_relative '../converters/from_html' xml['dc'].description FicTracker::Converters::FromHTML.to_plain(@story.synopsis) end - xml['dc'].creator story.author&.to_s || '', 'opf:role': 'aut', 'opf:file-as': (story.author&.to_s || 'Unknown') + story.authors.each do |aut| + xml['dc'].creator aut.to_s, 'opf:role': 'aut', 'opf:file-as': aut.to_s + end + xml['dc'].creator '', 'opf:role': 'aut', 'opf:file-as': '' if story.authors.empty? xml['dc'].publisher story.backend.full_name - xml['dc'].date (story.published_at || Time.now).to_datetime + xml['dc'].date (story.published_at || story.updated_at || Time.now).to_datetime xml['dc'].relation story.backend.url xml['dc'].source story.url if story.url - story.tags.each do |tag| + story.tags&.each do |tag| xml['dc'].subject tag.to_s end xml.meta name: 'cover', content: 'coverImage' if cover diff --git a/lib/fic_tracker/renderers/html.rb b/lib/fic_tracker/renderers/html.rb index 516d5bd..be90987 100644 --- a/lib/fic_tracker/renderers/html.rb +++ b/lib/fic_tracker/renderers/html.rb @@ -44,11 +44,16 @@ module FicTracker::Renderers html.address do html << "By " - if story.author&.url - html.a story.author.to_s, href: story.author.url - else - html.i story.author&.to_s || '' + story.authors.each do |author| + html << 'and ' if author != story.authors.first && author == story.authors.last + if author.url + html.a author.to_s, href: author.url + else + html.i author.to_s + end + html << ', ' unless author == story.authors.last end + html.i '' if story.authors.empty? end html.a "On #{story.backend.full_name}.", href: story.url if story.url @@ -64,7 +69,7 @@ module FicTracker::Renderers html.br html.dl do - story.tags.map { |td| FicTracker::Models::Light::Tag.load td }.sort_by(&:ordering).group_by(&:category).each do |category, tags| + story.tags&.map { |td| FicTracker::Models::Light::Tag.load td }.sort_by(&:ordering).group_by(&:category).each do |category, tags| html.dt category.to_s.capitalize html.dd do tags.each do |tag| @@ -121,7 +126,9 @@ module FicTracker::Renderers html.head do html.meta charset: 'utf-8' html.meta viewport: 'width=device-width, initial-scale=1' - html.meta name: 'author', content: story.author.to_s + story.authors.each do |aut| + html.meta name: 'author', content: aut.to_s + end html.meta name: 'generator', content: "FicTracker/#{FicTracker::VERSION}" html.meta name: 'keywords', content: story.tags.map(&:to_s).join(',') diff --git a/lib/fic_tracker/server.rb b/lib/fic_tracker/server.rb index 2eae2d4..d2c7129 100644 --- a/lib/fic_tracker/server.rb +++ b/lib/fic_tracker/server.rb @@ -5,6 +5,12 @@ require 'sinatra/base' module FicTracker # Web server for providing your fic tracking needs class Server < Sinatra::Base + def initialize(*) + @task_runner = Thread.new { background_tasks } + + super + end + configure :development do require 'sinatra/reloader' register Sinatra::Reloader @@ -86,6 +92,71 @@ module FicTracker collection&.save_changes end + head '/story/:backend/*.*' do |_backend_name, slug, format| + mime = nil + case format + when 'epub', :epub + format = :epub + mime = 'application/epub+zip' + when 'html', :html + format = :html + mime = 'text/html' + when 'txt', :txt, 'md', :md + format = :markdown + mime = 'text/markdown' + else + halt 400, "Unknown format #{format}" + end + + story = Models::Story.find(backend_name: backend.name, slug:) + story ||= Models::Story.new(backend_name: backend.name, slug:) + + content_type mime + attachment "#{story.safe_name}.#{format}" + + last_modified story.updated_at || story.published_at + etag story.etag + ensure + story&.save_changes + end + + # rubocop:disable Metrics/BlockLength + get '/story/:backend/*.*' do |_backend_name, slug, format| + mime = nil + case format + when 'epub', :epub + format = :epub + mime = 'application/epub+zip' + when 'html', :html + format = :html + mime = 'text/html' + when 'txt', :txt, 'md', :md + format = :markdown + mime = 'text/markdown' + else + halt 400, "Unknown format #{format}" + end + + story = Models::Story.find(backend_name: backend.name, slug:) + story ||= Models::Story.new(backend_name: backend.name, slug:) + + story.refresh_content + story.refresh_metadata + story.set(last_accessed: Time.now) + + content_type mime + attachment "#{story.safe_name}.#{format}" + + last_modified story.updated_at || story.published_at + etag story.etag + + story.ensure_fully_loaded + FicTracker::Renderers.render(format, story) + ensure + story&.save_changes + end + # rubocop:enable Metrics/BlockLength + get '/story/:backend/:slug', provides: :html do |_backend_name, slug| story = Models::Story.find(backend_name: backend.name, slug:) story ||= Models::Story.new(backend_name: backend.name, slug:) @@ -119,44 +190,27 @@ module FicTracker story&.save_changes end - # rubocop:disable Metrics/BlockLength - get '/story/:backend/:slug.:format' do |_backend_name, slug, format| - render_klass = nil - mime = nil - case format - when 'epub', :epub - render_klass = Renderers::Epub - mime = 'application/epub+zip' - when 'html', :html - render_klass = Renderers::HTML - mime = 'text/html' - when 'txt', :txt, 'md', :md - render_klass = Renderers::Markdown - mime = 'text/markdown' - else - halt 400, "Unknown format #{format}" + private + + def background_tasks + $stderr.puts "Starting background task loop" + loop do + FicTracker::Models::Story.expire + FicTracker::Models::Author.expire + + FicTracker::Models::Story.needing_content_refresh.each(&:refresh_content) + FicTracker::Models::Story.needing_metadata_refresh.each(&:refresh_metadata) + FicTracker::Models::Author.needing_metadata_refresh.each(&:refresh_metadata) + + FicTracker.cache.expire + rescue StandardError => e + FicTracker.logger.error "Failed when running background tasks, #{e.class}: #{e}\n#{e.backtrace[-5,5].join("\n ")}" + ensure + iter += 1 + sleep 30 * 60 end - - story = Models::Story.find(backend_name: backend.name, slug:) - story ||= Models::Story.new(backend_name: backend.name, slug:) - - story.refresh_content - story.refresh_metadata - story.set(last_accessed: Time.now) - - attachment "#{story.safe_name}.#{format}" - content_type mime - - last_modified story.updated_at || story.published_at - etag story.etag - - story.ensure_fully_loaded - stream do |out| - render_klass.new(story, io: out).render - end - ensure - story&.save_changes + rescue StandardError => e + $stderr.puts "Fatal background error: #{e.class}: #{e}" end - # rubocop:enable Metrics/BlockLength end end diff --git a/lib/fic_tracker/util/cache.rb b/lib/fic_tracker/util/cache.rb index e3118b2..de02fc9 100644 --- a/lib/fic_tracker/util/cache.rb +++ b/lib/fic_tracker/util/cache.rb @@ -176,8 +176,8 @@ module FicTracker::Util if (flags & COMPRESSED_FLAG) == COMPRESSED_FLAG require 'zlib' data = Zlib::Inflate.inflate(data) - data.force_encoding('UTF-8') end + data.force_encoding('UTF-8') case (flags & ENCODING_FLAGS) when ENCODING_MARSHAL