-
Notifications
You must be signed in to change notification settings - Fork 1
Gfllm search export #215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Gfllm search export #215
Changes from 18 commits
aaf6d9a
53f2fef
ffa4e42
7c0b270
ca3ee09
bbb78e2
c40cef0
3e818d6
a4901b7
0efdf8d
f30bead
0c2d944
96d37fd
29dc710
6332d64
cd6e07d
fa07512
b045965
f00a676
acdba3c
fa64872
77583ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| COPY ( | ||
| SELECT | ||
| json_build_object( | ||
| 'id', todo.id, | ||
| 'urs_taxid', todo.urs_taxid, | ||
| 'should_show_goflow', true | ||
| ) | ||
| FROM search_export_urs todo | ||
| JOIN go_flow_llm_curation_results gfllm | ||
| ON | ||
| todo.urs_taxid = gfllm.urs_taxid | ||
| ORDER by todo.id | ||
| ) TO STDOUT |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| use serde::{ | ||
| Deserialize, | ||
| Serialize, | ||
| }; | ||
| use std::path::Path; | ||
|
|
||
| use anyhow::Result; | ||
| use rnc_core::grouper; | ||
|
|
||
| #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||
| pub struct GoFlowLLMAnnotation { | ||
| pub id: usize, | ||
| urs_taxid: String, | ||
| should_show_goflow: bool, | ||
| } | ||
|
|
||
| impl grouper::HasIndex for GoFlowLLMAnnotation { | ||
| fn index(&self) -> usize { | ||
| self.id | ||
| } | ||
| } | ||
|
|
||
| pub fn group(path: &Path, max: usize, output: &Path) -> Result<()> { | ||
| grouper::group::<GoFlowLLMAnnotation>(grouper::Criteria::AnyNumber, &path, 1, max, &output) | ||
| } | ||
|
|
||
| impl GoFlowLLMAnnotation { | ||
| pub fn should_show_goflow(&self) -> bool { | ||
| self.should_show_goflow | ||
| } | ||
| pub fn urs_taxid(&self) -> &str { | ||
| &self.urs_taxid | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ use crate::sequences::{ | |
| qa_status::QaStatus, | ||
| r2dt::R2dt, | ||
| rfam_hit::RfamHit, | ||
| go_flow_annotations::GoFlowLLMAnnotation, | ||
| so_tree, | ||
| }; | ||
|
|
||
|
|
@@ -46,6 +47,7 @@ pub struct Raw { | |
| publication_counts: Option<PublicationCount>, | ||
| litsumm_summaries: Vec<LitsummSummaries>, | ||
| editing_events: Vec<EditingEvent>, | ||
| go_flow_llm_annotations: Vec<GoFlowLLMAnnotation>, | ||
| so_tree: so_tree::SoTree, | ||
| } | ||
|
|
||
|
|
@@ -148,6 +150,11 @@ impl Raw { | |
| &self.editing_events | ||
| } | ||
|
|
||
| /// Get a reference to the raw's editing events. | ||
|
||
| pub fn go_flow_llm_annotations(&self) -> &[GoFlowLLMAnnotation] { | ||
| &self.go_flow_llm_annotations | ||
| } | ||
|
|
||
| /// Get this raw's publication count. | ||
| pub fn publication_count(&self) -> usize { | ||
| self.publication_counts.as_ref().map(|p| p.publication_count()).unwrap_or(0) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,6 +65,7 @@ process build_metadata { | |
| path(text) | ||
| path(litsumm) | ||
| path(editing_events) | ||
| path(go_flow_annotations) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| path(so_tree) | ||
|
|
||
| output: | ||
|
|
@@ -141,6 +142,20 @@ process litsumm_summaries { | |
| """ | ||
| } | ||
|
|
||
| process go_flow_annotations { | ||
| input: | ||
| val(max_count) | ||
| path (query) | ||
|
|
||
| output: | ||
| path("goflow_annotations.json") | ||
|
|
||
| """ | ||
| psql -v ON_ERROR_STOP=1 -f "$query" "$PGDATABASE" > raw.json | ||
| search-export group go-flow-annotation raw.json ${max_count} goflow_annotations.json | ||
afg1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """ | ||
| } | ||
|
|
||
| process editing_events { | ||
| input: | ||
| val(max_count) | ||
|
|
@@ -201,6 +216,7 @@ workflow sequences { | |
| Channel.fromPath('files/search-export/parts/text-mining.sql') | set { text_sql } | ||
| Channel.fromPath('files/search-export/parts/litsumm.sql') | set { litsumm_sql } | ||
| Channel.fromPath('files/search-export/parts/editing-events.sql') | set { editing_events_sql } | ||
| Channel.fromPath('files/search-export/parts/goflow.sql') | set { goflow_sql } | ||
| Channel.fromPath('files/search-export/so-rna-types.sql') | set { so_sql } | ||
|
|
||
| Channel.fromPath('files/search-export/parts/accessions.sql') | set { accessions_sql } | ||
|
|
@@ -230,6 +246,7 @@ workflow sequences { | |
| text_mining_query(search_count, text_sql), | ||
| litsumm_summaries(search_count, litsumm_sql), | ||
| editing_events(search_count, editing_events_sql), | ||
| go_flow_annotations(search_count, goflow_sql), | ||
| fetch_so_tree(so_sql), | ||
| )\ | ||
| | set { metadata } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The key
goflowused here does not match the field namego_flow_llm_annotationsin theNormalizedRust struct that generates the JSON data. This will result in aKeyErrorwhen the Python script tries to access the data. To ensure consistency with other fields likelitsummandediting_events, I've suggested a change in the Rust code to rename the field togoflowduring serialization. Alternatively, you could change this key togo_flow_llm_annotations.