diff --git a/app/logical/source/extractor/civitai.rb b/app/logical/source/extractor/civitai.rb index 4c4645177..ac85d7a75 100644 --- a/app/logical/source/extractor/civitai.rb +++ b/app/logical/source/extractor/civitai.rb @@ -10,19 +10,25 @@ def match? def image_urls if parsed_url&.full_image_url.present? [parsed_url.full_image_url] - elsif image_uuid.present? - ["https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/#{image_uuid}/original=true"] + elsif image_uuids.present? + image_uuids.map do |uuid| + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/#{uuid}/original=true" + end else [] end end def page_url - "https://civitai.com/images/#{image_id}" if image_id.present? + if image_id.present? + "https://civitai.com/images/#{image_id}" + elsif post_id.present? + "https://civitai.com/posts/#{post_id}" + end end def artist_name - user_json["username"] + user_json["username"] || api_user["username"] end def profile_url @@ -30,39 +36,52 @@ def profile_url end memoize def html_response - return nil unless page_url.present? - response = http.cache(1.minute).get(page_url) + # XXX Can't use page_url here, because posts require an API call. + # return nil unless image_id.present? + http.cache(1.minute).parsed_get(page_url) || Nokogiri::XML::Document.new + end + + memoize def next_queries + JSON.parse(html_response&.at("#__NEXT_DATA__") || "{}").dig("props", "pageProps", "trpcState", "json", "queries").to_a + end - return nil unless response.status == 200 - response.parse + def post_json + next_queries.dig(0, "state", "data").to_h end - memoize def next_data - JSON.parse(html_response&.at("#__NEXT_DATA__") || "{}").dig("props", "pageProps", "trpcState", "json") || {} + def image_json + # Only on post pages. + next_queries.dig(1, "state", "data").to_h end def image_id parsed_url&.image_id || parsed_referer&.image_id end - def image_uuid - next_data.dig("queries", 0, "state", "data", "url") + def post_id + parsed_url&.post_id || parsed_referer&.post_id end - def image_name - next_data.dig("queries", 0, "state", "data", "name") + memoize def image_uuids + uuids = if image_json.present? + image_json.dig("pages", 0, "items").to_a.map do |item| + item['url'] + end + else + [post_json["url"]].compact + end end - def image_metadata - next_data.dig("queries", 0, "state", "data", "meta").to_h + def user_json + post_json["user"].to_h end - def user_json - next_data.dig("queries", 0, "state", "data", "user").to_h + def artist_commentary_title + post_json["title"] end - def width - next_data.dig("queries", 0, "state", "data", "width") + def artist_commentary_desc + post_json["detail"] end end end diff --git a/app/logical/source/url/civitai.rb b/app/logical/source/url/civitai.rb index 8b525f9f3..0e208a4f2 100644 --- a/app/logical/source/url/civitai.rb +++ b/app/logical/source/url/civitai.rb @@ -2,7 +2,7 @@ module Source class URL::Civitai < Source::URL - attr_reader :image_id, :uuid + attr_reader :image_id, :post_id, :uuid def self.match?(url) url.domain == "civitai.com" @@ -14,7 +14,10 @@ def parse in _, "civitai.com", "images", image_id @image_id = image_id - in ("image" | "images"), "civitai.com", "xG1nkqKTMzGDvpLrqFT7WA", uuid, *rest + in _, "civitai.com", "posts", post_id + @post_id = post_id + + in ("image" | "images" | "imagecache"), "civitai.com", "xG1nkqKTMzGDvpLrqFT7WA", uuid, *rest @uuid = uuid else @@ -31,7 +34,11 @@ def full_image_url end def page_url - "https://civitai.com/images/#{image_id}" if image_id.present? + if image_id.present? + "https://civitai.com/images/#{image_id}" + elsif post_id.present? + "https://civitai.com/posts/#{post_id}" + end end end end