Skip to content

Commit

Permalink
fix template search for csv input
Browse files Browse the repository at this point in the history
  • Loading branch information
YoshitakaMo committed Feb 22, 2024
1 parent 9a8822f commit 06c775a
Showing 1 changed file with 92 additions and 44 deletions.
136 changes: 92 additions & 44 deletions colabfold/mmseqs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,6 @@ def mmseqs_search_monomer(
subprocess.run([mmseqs] + ["rmdb", base.joinpath("res")])
subprocess.run([mmseqs] + ["rmdb", base.joinpath("res_exp_realign_filter")])

if use_templates:
run_mmseqs(mmseqs, ["search", base.joinpath("prof_res"), dbbase.joinpath(template_db), base.joinpath("res_pdb"),
base.joinpath("tmp2"), "--db-load-mode", str(db_load_mode), "--threads", str(threads), "-s", "7.5", "-a", "-e", "0.1"])
run_mmseqs(mmseqs, ["convertalis", base.joinpath("prof_res"), dbbase.joinpath(f"{template_db}{dbSuffix3}"), base.joinpath("res_pdb"),
base.joinpath(f"{template_db}.m8"), "--format-output",
"query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,cigar",
"--db-load-mode", str(db_load_mode), "--threads", str(threads)])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("res_pdb")])
if use_env:
run_mmseqs(mmseqs, ["search", base.joinpath("prof_res"), dbbase.joinpath(metagenomic_db), base.joinpath("res_env"),
base.joinpath("tmp3"), "--threads", str(threads)] + search_param)
Expand All @@ -139,12 +131,23 @@ def mmseqs_search_monomer(
run_mmseqs(mmseqs, ["rmdb", base.joinpath("res_env_exp")])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("res_env")])

if use_env:
run_mmseqs(mmseqs, ["mergedbs", base.joinpath("qdb"), base.joinpath("final.a3m"), base.joinpath("uniref.a3m"), base.joinpath("bfd.mgnify30.metaeuk30.smag30.a3m")])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("bfd.mgnify30.metaeuk30.smag30.a3m")])
else:
run_mmseqs(mmseqs, ["mvdb", base.joinpath("uniref.a3m"), base.joinpath("final.a3m")])

if use_templates:
run_mmseqs(mmseqs, ["search", base.joinpath("prof_res"), dbbase.joinpath(template_db), base.joinpath("res_pdb"),
base.joinpath("tmp2"), "--db-load-mode", str(db_load_mode), "--threads", str(threads), "-s", "7.5", "-a", "-e", "0.1"])
run_mmseqs(mmseqs, ["convertalis", base.joinpath("prof_res"), dbbase.joinpath(f"{template_db}{dbSuffix3}"), base.joinpath("res_pdb"),
base.joinpath(f"{template_db}"), "--format-output",
"query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,cigar",
"--db-output", "1",
"--db-load-mode", str(db_load_mode), "--threads", str(threads)])
run_mmseqs(mmseqs, ["unpackdb", base.joinpath(f"{template_db}"), base.joinpath("."), "--unpack-name-mode", "0", "--unpack-suffix", ".m8"])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("res_pdb")])
run_mmseqs(mmseqs, ["rmdb", base.joinpath(f"{template_db}")])

run_mmseqs(mmseqs, ["unpackdb", base.joinpath("final.a3m"), base.joinpath("."), "--unpack-name-mode", "0", "--unpack-suffix", ".a3m"])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("final.a3m")])
run_mmseqs(mmseqs, ["rmdb", base.joinpath("uniref.a3m")])
Expand Down Expand Up @@ -238,40 +241,78 @@ def main():
)
# dbs are uniref, templates and environmental
# We normally don't use templates
parser.add_argument("--db1", type=Path, default=Path("uniref30_2302_db"),
help="UniRef database")
parser.add_argument("--db2", type=Path, default=Path(""),
help="Templates database")
parser.add_argument("--db3", type=Path, default=Path("colabfold_envdb_202108_db"),
help="Environmental database")
parser.add_argument(
"--db1", type=Path, default=Path("uniref30_2302_db"), help="UniRef database"
)
parser.add_argument("--db2", type=Path, default=Path(""), help="Templates database")
parser.add_argument(
"--db3",
type=Path,
default=Path("colabfold_envdb_202108_db"),
help="Environmental database",
)

# poor man's boolean arguments
parser.add_argument("--use-env", type=int, default=1, choices=[0, 1],
help="Use --db3")
parser.add_argument("--use-templates", type=int, default=0, choices=[0, 1],
help="Use --db2")
parser.add_argument("--filter", type=int, default=1, choices=[0, 1],
help="Filter the MSA by pre-defined align_eval, qsc, max_accept")
parser.add_argument(
"--use-env", type=int, default=1, choices=[0, 1], help="Use --db3"
)
parser.add_argument(
"--use-templates", type=int, default=0, choices=[0, 1], help="Use --db2"
)
parser.add_argument(
"--filter",
type=int,
default=1,
choices=[0, 1],
help="Filter the MSA by pre-defined align_eval, qsc, max_accept",
)

# mmseqs params
parser.add_argument("--mmseqs", type=Path, default=Path("mmseqs"),
help="Location of the mmseqs binary.")
parser.add_argument("--expand-eval", type=float, default=math.inf,
help="e-val threshold for 'expandaln'.")
parser.add_argument("--align-eval", type=int, default=10,
help="e-val threshold for 'align'.")
parser.add_argument("--diff", type=int, default=3000,
help="filterresult - Keep at least this many seqs in each MSA block.")
parser.add_argument("--qsc", type=float, default=-20.0,
help="filterresult - reduce diversity of output MSAs using min score thresh.")
parser.add_argument("--max-accept", type=int, default=1000000,
help="align - Maximum accepted alignments before alignment calculation for a query is stopped.")
parser.add_argument("--pairing_strategy", type=int, default=0,
help="pairaln - Pairing strategy.")
parser.add_argument("--db-load-mode", type=int, default=0,
help="Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch")
parser.add_argument("--threads", type=int, default=64,
help="Number of threads to use.")
parser.add_argument(
"--mmseqs",
type=Path,
default=Path("mmseqs"),
help="Location of the mmseqs binary.",
)
parser.add_argument(
"--expand-eval",
type=float,
default=math.inf,
help="e-val threshold for 'expandaln'.",
)
parser.add_argument(
"--align-eval", type=int, default=10, help="e-val threshold for 'align'."
)
parser.add_argument(
"--diff",
type=int,
default=3000,
help="filterresult - Keep at least this many seqs in each MSA block.",
)
parser.add_argument(
"--qsc",
type=float,
default=-20.0,
help="filterresult - reduce diversity of output MSAs using min score thresh.",
)
parser.add_argument(
"--max-accept",
type=int,
default=1000000,
help="align - Maximum accepted alignments before alignment calculation for a query is stopped.",
)
parser.add_argument(
"--pairing_strategy", type=int, default=0, help="pairaln - Pairing strategy."
)
parser.add_argument(
"--db-load-mode",
type=int,
default=0,
help="Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch",
)
parser.add_argument(
"--threads", type=int, default=64, help="Number of threads to use."
)
args = parser.parse_args()

queries, is_complex = get_queries(args.query, None)
Expand Down Expand Up @@ -383,11 +424,18 @@ def main():
args.base.joinpath(f"{job_number}.a3m"),
args.base.joinpath(f"{safe_filename(raw_jobname)}.a3m"),
)
if args.use_templates:
os.rename(
args.base.joinpath(f"{args.db2}.m8"),
args.base.joinpath(f"{safe_filename(raw_jobname)}_{args.db2}.m8"),
)

if args.use_templates:
id = 0
for raw_jobname, query_sequences, query_seqs_cardinality in queries_unique:
with args.base.joinpath(f"{safe_filename(raw_jobname)}_{args.db2}.m8").open(
"w"
) as f:
for _ in range(len(query_seqs_cardinality)):
with args.base.joinpath(f"{id}.m8").open("r") as g:
f.write(g.read())
os.remove(args.base.joinpath(f"{id}.m8"))
id += 1

query_file.unlink()
run_mmseqs(args.mmseqs, ["rmdb", args.base.joinpath("qdb")])
Expand Down

0 comments on commit 06c775a

Please sign in to comment.