Civitai: Support post pages.

aibooruorg · Jun 17, 2024 · 7eaa307 · 7eaa307
1 parent da4926b
commit 7eaa307
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 23 deletions.
diff --git a/app/logical/source/extractor/civitai.rb b/app/logical/source/extractor/civitai.rb
@@ -10,59 +10,78 @@ def match?
     def image_urls
       if parsed_url&.full_image_url.present?
         [parsed_url.full_image_url]
-      elsif image_uuid.present?
-        ["https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/#{image_uuid}/original=true"]
+      elsif image_uuids.present?
+        image_uuids.map do |uuid|
+          "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/#{uuid}/original=true"
+        end
       else
         []
       end
     end
 
     def page_url
-      "https://civitai.com/images/#{image_id}" if image_id.present?
+      if image_id.present?
+        "https://civitai.com/images/#{image_id}"
+      elsif post_id.present?
+        "https://civitai.com/posts/#{post_id}"
+      end
     end
 
     def artist_name
-      user_json["username"]
+      user_json["username"] || api_user["username"]
     end
 
     def profile_url
       "https://civitai.com/user/#{artist_name}" if artist_name.present?
     end
 
     memoize def html_response
-      return nil unless page_url.present?
-      response = http.cache(1.minute).get(page_url)
+      # XXX Can't use page_url here, because posts require an API call.
+      # return nil unless image_id.present?
+      http.cache(1.minute).parsed_get(page_url) || Nokogiri::XML::Document.new
+    end
+
+    memoize def next_queries
+      JSON.parse(html_response&.at("#__NEXT_DATA__") || "{}").dig("props", "pageProps", "trpcState", "json", "queries").to_a
+    end
 
-      return nil unless response.status == 200
-      response.parse
+    def post_json
+      next_queries.dig(0, "state", "data").to_h
     end
 
-    memoize def next_data
-      JSON.parse(html_response&.at("#__NEXT_DATA__") || "{}").dig("props", "pageProps", "trpcState", "json") || {}
+    def image_json
+      # Only on post pages.
+      next_queries.dig(1, "state", "data").to_h
     end
 
     def image_id
       parsed_url&.image_id || parsed_referer&.image_id
     end
 
-    def image_uuid
-      next_data.dig("queries", 0, "state", "data", "url")
+    def post_id
+      parsed_url&.post_id || parsed_referer&.post_id
     end
 
-    def image_name
-      next_data.dig("queries", 0, "state", "data", "name")
+    memoize def image_uuids
+      uuids = if image_json.present?
+        image_json.dig("pages", 0, "items").to_a.map do |item|
+          item['url']
+        end
+      else
+        [post_json["url"]].compact
+      end
     end
 
-    def image_metadata
-      next_data.dig("queries", 0, "state", "data", "meta").to_h
+    def user_json
+      post_json["user"].to_h
     end
 
-    def user_json
-      next_data.dig("queries", 0, "state", "data", "user").to_h
+    def artist_commentary_title
+      post_json["title"]
     end
 
-    def width
-      next_data.dig("queries", 0, "state", "data", "width")
+    def artist_commentary_desc
+      post_json["detail"]
     end
   end
 end
diff --git a/app/logical/source/url/civitai.rb b/app/logical/source/url/civitai.rb
@@ -2,7 +2,7 @@
 
 module Source
   class URL::Civitai < Source::URL
-    attr_reader :image_id, :uuid
+    attr_reader :image_id, :post_id, :uuid
 
     def self.match?(url)
       url.domain == "civitai.com"
@@ -14,7 +14,10 @@ def parse
       in _, "civitai.com", "images", image_id
         @image_id = image_id
 
-      in ("image" | "images"), "civitai.com", "xG1nkqKTMzGDvpLrqFT7WA", uuid, *rest
+      in _, "civitai.com", "posts", post_id
+        @post_id = post_id
+
+      in ("image" | "images" | "imagecache"), "civitai.com", "xG1nkqKTMzGDvpLrqFT7WA", uuid, *rest
         @uuid = uuid
 
       else
@@ -31,7 +34,11 @@ def full_image_url
     end
 
     def page_url
-      "https://civitai.com/images/#{image_id}" if image_id.present?
+      if image_id.present?
+        "https://civitai.com/images/#{image_id}"
+      elsif post_id.present?
+        "https://civitai.com/posts/#{post_id}"
+      end
     end
   end
 end