Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion app/controllers/oral_history_ai_conversation_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def new
end

def create
conversation = OralHistoryAiConversationJob.launch(session_id: session.id, question: params.require(:q))
search_params = params.slice(:collection_limit).to_unsafe_h

conversation = OralHistoryAiConversationJob.launch(session_id: session.id, question: params.require(:q), search_params: search_params)

redirect_to oral_history_ai_conversation_path(conversation.external_id)
end
Expand Down
4 changes: 2 additions & 2 deletions app/jobs/oral_history_ai_conversation_job.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Try to be idempotent!
class OralHistoryAiConversationJob < ApplicationJob
def self.launch(question:, session_id:)
conversation = OralHistory::AiConversation.create!(question: question.strip, session_id: session_id)
def self.launch(question:, session_id:, search_params: {})
conversation = OralHistory::AiConversation.create!(question: question.strip, session_id: session_id, search_params: search_params)
self.perform_later(conversation)

return conversation
Expand Down
22 changes: 21 additions & 1 deletion app/models/oral_history/ai_conversation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def exec_and_record_interaction
self.save!

# Start the conversation, could take 10-20 seconds even.
interactor = OralHistory::ClaudeInteractor.new(question: self.question, question_embedding: self.question_embedding)
interactor = OralHistory::ClaudeInteractor.new(question: self.question, question_embedding: self.question_embedding, limited_start_relation: limited_start_relation)
response = interactor.get_response(conversation_record: self)

self.answer_json = interactor.extract_answer(response)
Expand Down Expand Up @@ -95,4 +95,24 @@ def record_chunks_used(chunks)
}
end
end

# based on search_params or logged in user, we might want to limit to only certain records
# For now just search_params
#
# TODO WARNING this needs to be checked, there are those weird ones with requet mode "OFF" cause they
# aren't requestable at all now!!!
def limited_start_relation
case self.search_params["collection_limit"]
when "ohms"
OralHistoryChunk.joins(:oral_history_content).where.not(oral_history_content: { ohms_xml_text: [nil, ""]})
when "immediate"
OralHistoryChunk.joins(:oral_history_content).where(oral_history_content: { available_by_request_mode: ["off"] })
when "upon_request"
OralHistoryChunk.joins(:oral_history_content).where(oral_history_content: { available_by_request_mode: ["off", "automatic"] })
else
# unrestricted, all of em
OralHistoryChunk
end
end

end
8 changes: 6 additions & 2 deletions app/services/oral_history/chunk_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ class ChunkFetcher
attr_reader :top_k, :question_embedding, :max_per_interview, :oversample_factor
attr_reader :exclude_oral_history_chunk_ids, :exclude_oral_history_content_ids

attr_reader :limited_start_relation


# @param top_k [Integer] how many chunks do you want back
#
Expand All @@ -22,12 +24,14 @@ class ChunkFetcher
#
# @param exclude_interviews [Array<Work,OralHistoryContent,Integer>] Interviews to exclude. can be as Work, OralHistoryContent,
# or OralHistoryContent#id
def initialize(top_k:, question_embedding:, max_per_interview: nil, oversample_factor: 3, exclude_chunks: nil, exclude_interviews: nil)
def initialize(top_k:, question_embedding:, max_per_interview: nil, oversample_factor: 3, exclude_chunks: nil, exclude_interviews: nil, limited_start_relation:)
@top_k = top_k
@question_embedding = question_embedding
@max_per_interview = max_per_interview
@oversample_factor = oversample_factor

@limited_start_relation = limited_start_relation

if exclude_chunks
@exclude_oral_history_chunk_ids = exclude_chunks.collect {|i| i.kind_of?(OralHistoryChunk) ? i.id : i }
end
Expand Down Expand Up @@ -61,7 +65,7 @@ def fetch_chunks
# Without limit count, we'll add that later.
def base_relation
# Preload work, so we can get title or other metadata we might want.
relation = OralHistoryChunk.neighbors_for_embedding(question_embedding).includes(oral_history_content: :work)
relation = limited_start_relation.neighbors_for_embedding(question_embedding).includes(oral_history_content: :work)

# exclude specific chunks?
if exclude_oral_history_chunk_ids.present?
Expand Down
10 changes: 6 additions & 4 deletions app/services/oral_history/claude_interactor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ class ClaudeInteractor
region: ScihistDigicoll::Env.lookup(:aws_region)
)

attr_reader :question, :question_embedding
attr_reader :question, :question_embedding, :limited_start_relation

def initialize(question:, question_embedding:)
def initialize(question:, question_embedding:, limited_start_relation: OralHistoryChunk)
@question = question
@question_embedding = question_embedding
@limited_start_relation = limited_start_relation
end

# convenience to look up the embedding
Expand Down Expand Up @@ -107,13 +108,14 @@ def render_user_prompt(chunks)

def get_chunks
# fetch first 8 closest-vector chunks
chunks = OralHistory::ChunkFetcher.new(question_embedding: question_embedding, top_k: 8).fetch_chunks
chunks = OralHistory::ChunkFetcher.new(question_embedding: question_embedding, top_k: 8, limited_start_relation: limited_start_relation).fetch_chunks

# now fetch another 8, but only 1-per-interview, not including any interviews from above
chunks += OralHistory::ChunkFetcher.new(question_embedding: question_embedding,
top_k: 8,
max_per_interview: 1,
exclude_interviews: chunks.collect(&:oral_history_content_id).uniq).fetch_chunks
exclude_interviews: chunks.collect(&:oral_history_content_id).uniq,
limited_start_relation: limited_start_relation).fetch_chunks

chunks
end
Expand Down
15 changes: 15 additions & 0 deletions app/views/oral_history_ai_conversation/new.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,20 @@
<div class="d-flex justify-content-end">
<%= f.button :submit, "Ask", class: "btn btn-brand-main" %>
</div>

<div>
<%= f.input :collection_limit, as: :radio_buttons,
collection: [
['ohms', '[OHMS-only] Only those with synchronized transcripts'],
['immediate', '[All open(?)] All open for access with no verification'],
['upon_request', '[semi-restricted(?) and up] Also include some that require a form to verify email address'],
['permission_required', "[restricted(?) and up] Also include those that requires permission to access, after filling out a form explaining use"]
],
label_method: ->(item) { item[1].html_safe },
value_method: ->(item) { item[0] },
checked: 'permission_required',
label: "Which Oral Histories to consider in search" %>
</div>

<% end %>
</div>
4 changes: 4 additions & 0 deletions app/views/oral_history_ai_conversation/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
<span class="fw-bold">Q:</span> <%= @conversation.question %>
</div>

<div class="alert alert-info mb-4 mt-4">
<%= @conversation.search_params.inspect %>
</div>

<% if [email protected]? %>
<div class="d-flex align-items-center">
<i class="fa fa-circle-o-notch fa-spin fa-2x me-2" aria-hidden="true"></i>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddSearchParamsToOhAiConversation < ActiveRecord::Migration[8.0]
def change
add_column :oral_history_ai_conversations, :search_params, :jsonb
end
end
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[8.0].define(version: 2025_12_15_162911) do
ActiveRecord::Schema[8.0].define(version: 2025_12_22_213643) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
enable_extension "vector"
Expand Down Expand Up @@ -245,6 +245,7 @@
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "project_source_version"
t.jsonb "search_params"
end

create_table "oral_history_chunks", force: :cascade do |t|
Expand Down
Loading