diff --git a/pdbtools/__init__.py b/pdbtools/__init__.py index 1f5d9fee..c9f51aad 100644 --- a/pdbtools/__init__.py +++ b/pdbtools/__init__.py @@ -14,12 +14,110 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -# -# -# -# Nothing to see here -# Just a placeholder to allow setuptools:entry_points to work. -# -# -# +"""The pdb-tools library. + +A Swiss army knife for manipulating and editing PDB files. + +You can use pdb-tools as a library or as a series of convenient +command-line applications. The complete documentation is available at: + +http://www.bonvinlab.org/pdb-tools/ + +Examples at the command-line +---------------------------- + +$ pdb_fetch 1brs > 1brs.pdb +$ pdb_reres -1 1ctf.pdb > 1ctf_renumbered.pdb +$ pdb_selchain -A,D 1brs.pdb | pdb_delhetatm | pdb_tidy > 1brs_AD_noHET.pdb + + +Examples using pdb-tools as library +----------------------------------- + +You can import according to your needs: + +>>> import pdbtools +>>> from pdbtools import * +>>> from pdbtools import MODULE +>>> from pdbtools import pdb_selchain + +Chain the different functionalities conveniently: + +>>> from pdbtools import pdb_selchain, pdb_selatom, pdb_keepcoord +>>> with open('dummy.pdb') as fh: +>>> chain_a = pdb_selchain.run(fh, ['A']) +>>> only_N = pdb_selatom.run(chain_a, ['N']) +>>> coords = pdb_keepcoord.run(only_N) +>>> final = pdb_reres.run(coords, 5) +>>> print(''.join(final)) + +The list of MODULEs is specified bellow. + +All packages have three functions: `check_input`, `main`, and `run`. +The latter executes the logic of each package. `check_input` checks and +prepares potential input parameters to feed `run`. Use `check_input` in +case you are not sure the received input is correct. You can chain both +functions: + +>>> MODULE.run(**MODULE.check_input(*args)) + +If you control the input parameters use `run` directly. In general, +`run` functions are generators yielding the modified PDB data +line-by-line. `main` is used solely in the context of the command-line +interface. + +All MODULEs and `run` functions provide comprehensive documentation. + +>>> help(MODULE) +>>> help(MODULE.run) +""" + +__all__ = [ + 'pdb_b', + 'pdb_chainbows', + 'pdb_chain', + 'pdb_chainxseg', + 'pdb_chkensemble', + 'pdb_delchain', + 'pdb_delelem', + 'pdb_delhetatm', + 'pdb_delinsertion', + 'pdb_delresname', + 'pdb_delres', + 'pdb_element', + 'pdb_fetch', + 'pdb_fixinsert', + 'pdb_fromcif', + 'pdb_gap', + 'pdb_head', + 'pdb_intersect', + 'pdb_keepcoord', + 'pdb_merge', + 'pdb_mkensemble', + 'pdb_occ', + 'pdb_reatom', + 'pdb_reres', + 'pdb_rplchain', + 'pdb_rplresname', + 'pdb_seg', + 'pdb_segxchain', + 'pdb_selaltloc', + 'pdb_selatom', + 'pdb_selchain', + 'pdb_selelem', + 'pdb_selhetatm', + 'pdb_selresname', + 'pdb_selres', + 'pdb_selseg', + 'pdb_shiftres', + 'pdb_sort', + 'pdb_splitchain', + 'pdb_splitmodel', + 'pdb_splitseg', + 'pdb_tidy', + 'pdb_tocif', + 'pdb_tofasta', + 'pdb_uniqname', + 'pdb_validate', + 'pdb_wc', + ] diff --git a/pdbtools/pdb_b.py b/pdbtools/pdb_b.py index e809f156..5a2f313a 100644 --- a/pdbtools/pdb_b.py +++ b/pdbtools/pdb_b.py @@ -100,11 +100,11 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def pad_line(line): - """Helper function to pad line to 80 characters in case it is shorter""" + """Pad line to 80 characters in case it is shorter.""" size_of_line = len(line) if size_of_line < 80: padding = 80 - size_of_line + 1 @@ -112,10 +112,24 @@ def pad_line(line): return line[:81] # 80 + newline character -def alter_bfactor(fhandle, bfactor): - """Sets the temperature column in all ATOM/HETATM records to a given value. +def run(fhandle, bfactor): """ + Set the temperature column in all ATOM/HETATM records to a given value. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + bfactor : float + The desired bfactor. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ _pad_line = pad_line records = ('ATOM', 'HETATM') bfactor = "{0:>6.2f}".format(bfactor) @@ -127,13 +141,16 @@ def alter_bfactor(fhandle, bfactor): yield line +alter_bfactor = run + + def main(): # Check Input - bfactor, pdbfh = check_input(sys.argv[1:]) + pdbfh, bfactor = check_input(sys.argv[1:]) # Do the job - new_pdb = alter_bfactor(pdbfh, bfactor) + new_pdb = run(pdbfh, bfactor) # Output results try: diff --git a/pdbtools/pdb_chain.py b/pdbtools/pdb_chain.py index 5ad0aa1b..1c3ec10c 100644 --- a/pdbtools/pdb_chain.py +++ b/pdbtools/pdb_chain.py @@ -98,7 +98,7 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def pad_line(line): @@ -110,8 +110,23 @@ def pad_line(line): return line[:81] # 80 + newline character -def alter_chain(fhandle, chain_id): - """Sets the chain identifier column in all ATOM/HETATM records to a value. +def run(fhandle, chain_id): + """ + Set the chain identifier column in all ATOM/HETATM records to a value. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + chain_id : str + The new chain ID. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ _pad_line = pad_line @@ -124,12 +139,15 @@ def alter_chain(fhandle, chain_id): yield line +alter_chain = run + + def main(): # Check Input - chain, pdbfh = check_input(sys.argv[1:]) + pdbfh, chain = check_input(sys.argv[1:]) # Do the job - new_pdb = alter_chain(pdbfh, chain) + new_pdb = run(pdbfh, chain) try: _buffer = [] diff --git a/pdbtools/pdb_chainbows.py b/pdbtools/pdb_chainbows.py index 2cbc66ec..ecf23ae3 100644 --- a/pdbtools/pdb_chainbows.py +++ b/pdbtools/pdb_chainbows.py @@ -75,9 +75,23 @@ def check_input(args): return fh -def set_chain_sequence(fhandle): - """Sets chains sequentially based on existing TER records.""" +def run(fhandle): + """ + Set chains sequentially based on existing TER records. + + Follow sequence [ABC...abc...012...]. + + This function is a generator. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ chainlist = list( string.digits[::-1] + string.ascii_lowercase[::-1] + string.ascii_uppercase[::-1] ) # 987...zyx...cbaZYX...BCA. @@ -108,12 +122,15 @@ def set_chain_sequence(fhandle): yield line +set_chain_sequence = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = set_chain_sequence(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_chainxseg.py b/pdbtools/pdb_chainxseg.py index 8a4fd94f..9520af3d 100644 --- a/pdbtools/pdb_chainxseg.py +++ b/pdbtools/pdb_chainxseg.py @@ -79,8 +79,22 @@ def pad_line(line): return line[:81] # 80 + newline character -def place_chain_on_seg(fhandle): - """Replaces the segment identifier with the contents of the chain identifier. +def run(fhandle): + """ + Replace the segment identifier with the contents of the chain identifier. + + Acts on ATOM/HETATM/ANISOU. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ _pad_line = pad_line @@ -94,12 +108,15 @@ def place_chain_on_seg(fhandle): yield line +place_chain_on_seg = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = place_chain_on_seg(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_chkensemble.py b/pdbtools/pdb_chkensemble.py index be2770cc..54d2ea06 100644 --- a/pdbtools/pdb_chkensemble.py +++ b/pdbtools/pdb_chkensemble.py @@ -72,13 +72,22 @@ def check_input(args): return fh -def check_ensemble(fhandle): - """Checks if the ensemble is valid. +def run(fhandle): + """ + Check if the ensemble is valid. - Same atoms in each model - Paired MODEL/ENDMDL tags - """ + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Returns + ------- + int + 1 if an error was found. 0 if no errors are found. + """ model_open = False model_no = None model_data = {} # list of sets @@ -153,6 +162,9 @@ def check_ensemble(fhandle): return 1 +check_ensemble = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) diff --git a/pdbtools/pdb_delchain.py b/pdbtools/pdb_delchain.py index 39c4796b..46232c78 100644 --- a/pdbtools/pdb_delchain.py +++ b/pdbtools/pdb_delchain.py @@ -108,11 +108,27 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def delete_chain(fhandle, chain_set): - """Removes specific chains from the structure. +def run(fhandle, chain_set): + """ + Remove specific chains from the structure. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + chain_set : set, or list, or tuple + The group of chains to remove. Example: ('A', 'B'). + + Yields + ------ + str (line-by-line) + The PDB lines that are not part of the chains selected to be + removed. """ records = ('ATOM', 'HETATM', 'TER', 'ANISOU') @@ -123,12 +139,15 @@ def delete_chain(fhandle, chain_set): yield line +delete_chain = run + + def main(): # Check Input - element, pdbfh = check_input(sys.argv[1:]) + pdbfh, element = check_input(sys.argv[1:]) # Do the job - new_pdb = delete_chain(pdbfh, element) + new_pdb = run(pdbfh, element) try: _buffer = [] diff --git a/pdbtools/pdb_delelem.py b/pdbtools/pdb_delelem.py index 66930dd0..f2519468 100644 --- a/pdbtools/pdb_delelem.py +++ b/pdbtools/pdb_delelem.py @@ -110,13 +110,28 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def delete_elements(fhandle, element_set): - """Removes specific atoms matching the given element(s). +def run(fhandle, element_set): """ + Remove specific atoms matching the given element(s). + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + element_set : set, list, tuple + The elements to remove. + + Yields + ------ + str (line-by-line) + The PDB lines NOT matching the elements set. + Non-RECORDS lines are yielded as are. + """ records = ('ATOM', 'HETATM', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -125,12 +140,15 @@ def delete_elements(fhandle, element_set): yield line +delete_elements = run + + def main(): # Check Input - element_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, element_set = check_input(sys.argv[1:]) # Do the job - new_pdb = delete_elements(pdbfh, element_set) + new_pdb = run(pdbfh, element_set) try: _buffer = [] diff --git a/pdbtools/pdb_delhetatm.py b/pdbtools/pdb_delhetatm.py index 90bba324..82d2a8ee 100644 --- a/pdbtools/pdb_delhetatm.py +++ b/pdbtools/pdb_delhetatm.py @@ -69,8 +69,20 @@ def check_input(args): return fh -def remove_hetatm(fhandle): - """Removes all HETATM and associated records from the PDB file. +def run(fhandle): + """ + Remove all HETATM and associated records from the PDB file. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ # CONECT 1179 746 1184 1195 1203 @@ -92,12 +104,15 @@ def remove_hetatm(fhandle): yield line +remove_hetatm = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = remove_hetatm(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_delres.py b/pdbtools/pdb_delres.py index 754a848b..d4133544 100644 --- a/pdbtools/pdb_delres.py +++ b/pdbtools/pdb_delres.py @@ -172,13 +172,30 @@ def is_integer(string): sys.exit(1) resrange = set(range(start, end + 1)) - return (resrange, step, fh) + return (fh, resrange, step) -def delete_residues(fhandle, residue_range, step): - """Deletes residues within a certain numbering range. +def run(fhandle, residue_range, step): """ + Delete residues within a certain numbering range. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + residue_range : set, list, or tuple + The residues describing the range. + + step : int + The step at which delete. + + Yields + ------ + str (line-by-line) + All lines except RECORDS within the residue range. + """ prev_res = None res_counter = -1 records = ('ATOM', 'HETATM', 'TER', 'ANISOU') @@ -196,12 +213,15 @@ def delete_residues(fhandle, residue_range, step): yield line +delete_residues = run + + def main(): # Check Input - resrange, step, pdbfh = check_input(sys.argv[1:]) + pdbfh, resrange, step = check_input(sys.argv[1:]) # Do the job - new_pdb = delete_residues(pdbfh, resrange, step) + new_pdb = run(pdbfh, resrange, step) try: _buffer = [] diff --git a/pdbtools/pdb_delresname.py b/pdbtools/pdb_delresname.py index 2200c9ea..8b9b355d 100644 --- a/pdbtools/pdb_delresname.py +++ b/pdbtools/pdb_delresname.py @@ -109,13 +109,27 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def delete_residue_by_name(fhandle, resname_set): - """Removes specific residue that match a given name. +def run(fhandle, resname_set): """ + Remove specific residue that match a given name. + Non-coords lines are maintained. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The PDB lines not matching the residues selected. + Non-coord lines are yielded as well. + """ records = ('ATOM', 'HETATM', 'ANISOU', 'TER') for line in fhandle: if line.startswith(records): @@ -124,12 +138,15 @@ def delete_residue_by_name(fhandle, resname_set): yield line +delete_residue_by_name = run + + def main(): # Check Input - resname_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, resname_set = check_input(sys.argv[1:]) # Do the job - new_pdb = delete_residue_by_name(pdbfh, resname_set) + new_pdb = run(pdbfh, resname_set) try: _buffer = [] diff --git a/pdbtools/pdb_element.py b/pdbtools/pdb_element.py index d9c4ec8b..7fdecd00 100644 --- a/pdbtools/pdb_element.py +++ b/pdbtools/pdb_element.py @@ -78,15 +78,26 @@ def pad_line(line): return line[:81] # 80 + newline character -def assign_element(fhandle): - """Assigns each atom's element based on the atom name field. +def run(fhandle): + """ + Assign each atom's element based on the atom name field. Rules specified in the format specification: - Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13. - Atom nomenclature begins with atom type. - """ + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ _pad_line = pad_line elements = set(('H', 'D', 'HE', 'LI', 'BE', 'B', 'C', 'N', 'O', 'F', 'NE', @@ -124,12 +135,15 @@ def assign_element(fhandle): yield line +assign_element = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = assign_element(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_fetch.py b/pdbtools/pdb_fetch.py index 41d5f3d3..236f281f 100644 --- a/pdbtools/pdb_fetch.py +++ b/pdbtools/pdb_fetch.py @@ -92,8 +92,24 @@ def check_input(args): return (pdb_code, option) -def fetch_structure(pdbid, biounit=False): - """Downloads the structure in PDB format from the RCSB PDB website. +def run(pdbid, biounit=False): + """ + Download the structure in PDB format from the RCSB PDB website. + + This function is a generator. + + Parameters + ---------- + pdbid : str + The alpha-numeric code of the PBDID. + + biounit : bool + Whether to download biounit version. + + Yield + ----- + str (line-by-line) + The original PBD data. """ base_url = 'https://files.rcsb.org/download/' @@ -127,12 +143,15 @@ def fetch_structure(pdbid, biounit=False): gz_handle.close() +fetch_structure = run + + def main(): # Check Input pdb_code, biounit = check_input(sys.argv[1:]) # Do the job - new_pdb = fetch_structure(pdb_code, biounit) + new_pdb = run(pdb_code, biounit) try: _buffer = [] diff --git a/pdbtools/pdb_fixinsert.py b/pdbtools/pdb_fixinsert.py index d3d1d09f..1b4b394c 100644 --- a/pdbtools/pdb_fixinsert.py +++ b/pdbtools/pdb_fixinsert.py @@ -109,14 +109,31 @@ def check_input(args): fh.close() sys.exit(1) - return (option_list, fh) + return (fh, option_list) -def fix_insertions(fhandle, option_list): - """Deletes insertion codes (at specific residues). +def run(fhandle, option_list): + """ + Delete insertion codes (at specific residues). + + By default, removes ALL insertion codes on ALL residues. Also bumps + the residue numbering of residues downstream of each insertion. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. - By default, removes ALL insertion codes on ALL residues. Also bumps the - residue numbering of residues downstream of each insertion. + option_list : list + List of insertion options to act on. + Example ["A9", "B12"]. An empty list performs the default + action. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ option_set = set(option_list) # empty if option_list is empty @@ -172,12 +189,15 @@ def fix_insertions(fhandle, option_list): yield line +fix_insertions = run + + def main(): # Check Input - option_list, pdbfh = check_input(sys.argv[1:]) + pdbfh, option_list = check_input(sys.argv[1:]) # Do the job - new_pdb = fix_insertions(pdbfh, option_list) + new_pdb = run(pdbfh, option_list) try: _buffer = [] diff --git a/pdbtools/pdb_fromcif.py b/pdbtools/pdb_fromcif.py index c1668095..c468bef5 100644 --- a/pdbtools/pdb_fromcif.py +++ b/pdbtools/pdb_fromcif.py @@ -73,10 +73,21 @@ def check_input(args): return fh -def convert_to_pdb(fhandle): - """Converts a structure in mmCIF format to PDB format. +def run(fhandle): """ + Convert a structure in mmCIF format to PDB format. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + New PDB lines. + """ _a = "{:6s}{:5d} {:<4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}" _a += "{:6.2f}{:6.2f} {:<4s}{:<2s}{:2s}\n" @@ -213,12 +224,15 @@ def convert_to_pdb(fhandle): yield "{:<80s}\n".format("END") +convert_to_pdb = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = convert_to_pdb(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_gap.py b/pdbtools/pdb_gap.py index d7b1a049..55db6332 100644 --- a/pdbtools/pdb_gap.py +++ b/pdbtools/pdb_gap.py @@ -72,8 +72,18 @@ def check_input(args): return fh -def detect_gaps(fhandle): - """Detects gaps between residues in the PDB file. +def run(fhandle): + """ + Detect gaps between residues in the PDB file. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Returns + ------- + None + Writes to the sys.stdout. """ fmt_GAPd = "{0[1]}:{0[3]}{0[2]} < {2:7.2f}A > {1[1]}:{1[3]}{1[2]}\n" @@ -123,12 +133,15 @@ def calculate_sq_atom_distance(i, j): sys.stdout.write('Found {} gap(s) in the structure\n'.format(n_gaps)) +detect_gaps = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - detect_gaps(pdbfh) + run(pdbfh) # last line of the script # We can close it even if it is sys.stdin diff --git a/pdbtools/pdb_head.py b/pdbtools/pdb_head.py index bee2cbff..fe8be395 100644 --- a/pdbtools/pdb_head.py +++ b/pdbtools/pdb_head.py @@ -102,11 +102,11 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def pad_line(line): - """Helper function to pad line to 80 characters in case it is shorter""" + """Pad line to 80 characters in case it is shorter.""" size_of_line = len(line) if size_of_line < 80: padding = 80 - size_of_line + 1 @@ -114,10 +114,26 @@ def pad_line(line): return line[:81] # 80 + newline character -def get_first_n_lines(fhandle, num_lines): - """Returns the first N (ATOM/HETATM) lines of the PDB file. +def run(fhandle, num_lines): """ + Filter the first N (ATOM/HETATM) lines of the PDB file. + Non-RECORD lines are ignored. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + num_lines : int + The number of lines to yield. + + Yields + ------ + str (line-by-line) + The first N lines (ATOM/HETATM). + """ counter = 0 _pad_line = pad_line @@ -132,12 +148,15 @@ def get_first_n_lines(fhandle, num_lines): break +get_first_n_lines = run + + def main(): # Check Input - chain, pdbfh = check_input(sys.argv[1:]) + pdbfh, chain = check_input(sys.argv[1:]) # Do the job - new_pdb = get_first_n_lines(pdbfh, chain) + new_pdb = run(pdbfh, chain) try: _buffer = [] diff --git a/pdbtools/pdb_intersect.py b/pdbtools/pdb_intersect.py index 9eb7038c..15a9cce0 100644 --- a/pdbtools/pdb_intersect.py +++ b/pdbtools/pdb_intersect.py @@ -68,10 +68,24 @@ def check_input(args): return fl -def intersect_pdb_files(flist): - """Returns atoms common to all input files. +def run(flist): """ + Returns atoms common to all input files. + This function is a generator. + + Parameters + ---------- + flist : list of file-obj + The first item is the reference PBD files to which others will + be compared to. Items in this list should handle `.close()` + attribute. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ atom_data = collections.OrderedDict() # atom_uid: line records = ('ATOM', 'HETATM', 'ANISOU', 'TER') @@ -101,12 +115,15 @@ def intersect_pdb_files(flist): yield atom_data[atom] +intersect_pdb_files = run + + def main(): # Check Input pdbflist = check_input(sys.argv[1:]) # Do the job - new_pdb = intersect_pdb_files(pdbflist) + new_pdb = run(pdbflist) try: _buffer = [] diff --git a/pdbtools/pdb_keepcoord.py b/pdbtools/pdb_keepcoord.py index 6376f00c..99fd61f4 100644 --- a/pdbtools/pdb_keepcoord.py +++ b/pdbtools/pdb_keepcoord.py @@ -71,8 +71,20 @@ def check_input(args): return fh -def keep_coordinates(fhandle): - """Keeps only coordinate records in the PDB file. +def run(fhandle): + """ + Keep only coordinate records in the PDB file. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + Only the coordinate records in the PDB file. """ records = ('MODEL ', 'ATOM ', 'HETATM', @@ -83,12 +95,15 @@ def keep_coordinates(fhandle): yield line +keep_coordinates = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = keep_coordinates(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 278d51a3..3659df8f 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -66,8 +66,19 @@ def check_input(args): return fl -def concatenate_files(flist): - """Iterates over a list of files and yields each line sequentially. +def run(flist): + """ + Iterate over a list of files and yields each line sequentially. + + Parameters + ---------- + flist : list of file-like objects + Must handle `.close()` attribute. + + Yields + ------ + str (line-by-line) + Lines from the concatenated PDB files. """ for fhandle in flist: @@ -76,12 +87,15 @@ def concatenate_files(flist): fhandle.close() +concatenate_files = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = concatenate_files(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_mkensemble.py b/pdbtools/pdb_mkensemble.py index a02b3f95..6e7fff49 100644 --- a/pdbtools/pdb_mkensemble.py +++ b/pdbtools/pdb_mkensemble.py @@ -70,11 +70,22 @@ def pad_line(line): return line[:81] # 80 + newline character -def make_ensemble(f_name_list): - """ - Combines several PDB files into a multi-model ensemble file. +def run(f_name_list): """ + Combine several PDB files into a multi-model ensemble file. + + This function is a generator. + + Parameters + ---------- + f_name_list : list + List of paths to PDB files. + Yields + ------ + str (line-by-line) + The new ensemble PDB file. + """ _pad_line = pad_line # REMARK THIS ENTRY @@ -112,12 +123,15 @@ def make_ensemble(f_name_list): yield 'END\n' +make_ensemble = run + + def main(): # Check Input pdbfile_list = check_input(sys.argv[1:]) # Do the job - new_pdb = make_ensemble(pdbfile_list) + new_pdb = run(pdbfile_list) try: _buffer = [] diff --git a/pdbtools/pdb_occ.py b/pdbtools/pdb_occ.py index a73c3866..048f119c 100644 --- a/pdbtools/pdb_occ.py +++ b/pdbtools/pdb_occ.py @@ -100,11 +100,26 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) -def alter_occupancy(fhandle, occupancy): - """Sets the occupancy column in all ATOM/HETATM records to a given value. +def run(fhandle, occupancy): + """ + Set the occupancy column in all ATOM/HETATM records to a given value. + + Non-ATOM/HETATM lines are give as are. This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + occupancy : float + The desired occupancy value + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ records = ('ATOM', 'HETATM') @@ -116,12 +131,15 @@ def alter_occupancy(fhandle, occupancy): yield line +alter_occupancy = run + + def main(): # Check Input - occupancy, pdbfh = check_input(sys.argv[1:]) + pdbfh, occupancy = check_input(sys.argv[1:]) # Do the job - new_pdb = alter_occupancy(pdbfh, occupancy) + new_pdb = run(pdbfh, occupancy) # Output results try: diff --git a/pdbtools/pdb_reatom.py b/pdbtools/pdb_reatom.py index 62683319..f8d46729 100644 --- a/pdbtools/pdb_reatom.py +++ b/pdbtools/pdb_reatom.py @@ -101,11 +101,23 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) -def renumber_atom_serials(fhandle, starting_value): - """Resets the atom serial number column to start from a specific number. +def run(fhandle, starting_value): + """ + Reset the atom serial number column to start from a specific number. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ # CONECT 1179 746 1184 1195 1203 @@ -154,12 +166,15 @@ def renumber_atom_serials(fhandle, starting_value): yield line +renumber_atom_serials = run + + def main(): # Check Input - starting_resid, pdbfh = check_input(sys.argv[1:]) + pdbfh, starting_resid = check_input(sys.argv[1:]) # Do the job - new_pdb = renumber_atom_serials(pdbfh, starting_resid) + new_pdb = run(pdbfh, starting_resid) # Output results try: diff --git a/pdbtools/pdb_reres.py b/pdbtools/pdb_reres.py index b7309bb8..7f10b8f1 100644 --- a/pdbtools/pdb_reres.py +++ b/pdbtools/pdb_reres.py @@ -101,7 +101,7 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def pad_line(line): @@ -113,8 +113,23 @@ def pad_line(line): return line[:81] # 80 + newline character -def renumber_residues(fhandle, starting_resid): - """Resets the residue number column to start from a specific number. +def run(fhandle, starting_resid): + """ + Reset the residue number column to start from a specific number. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + starting_resid : int + The starting residue number. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ _pad_line = pad_line prev_resid = None # tracks chain and resid @@ -143,12 +158,15 @@ def renumber_residues(fhandle, starting_resid): yield line +renumber_residues = run + + def main(): # Check Input - starting_resid, pdbfh = check_input(sys.argv[1:]) + pdbfh, starting_resid = check_input(sys.argv[1:]) # Do the job - new_pdb = renumber_residues(pdbfh, starting_resid) + new_pdb = run(pdbfh, starting_resid) # Output results try: diff --git a/pdbtools/pdb_rplchain.py b/pdbtools/pdb_rplchain.py index a2b228e2..e564fd5b 100644 --- a/pdbtools/pdb_rplchain.py +++ b/pdbtools/pdb_rplchain.py @@ -111,13 +111,28 @@ def check_input(args): if chain_to == '': chain_to = ' ' - return ((chain_from, chain_to), fh) + return (fh, (chain_from, chain_to)) -def replace_chain_identifiers(fhandle, chain_ids): - """Replaces one chain identifier by another in the PDB file. +def run(fhandle, chain_ids): """ + Replace one chain identifier by another in the PDB file. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + chain_ids : tuple + Two element tuple, where first item is the original chain id + and the second element is the modified chain id. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ chain_from, chain_to = chain_ids records = ('ATOM', 'HETATM', 'TER', 'ANISOU') @@ -129,12 +144,15 @@ def replace_chain_identifiers(fhandle, chain_ids): yield line +replace_chain_identifiers = run + + def main(): # Check Input - chain_ids, pdbfh = check_input(sys.argv[1:]) + pdbfh, chain_ids = check_input(sys.argv[1:]) # Do the job - new_pdb = replace_chain_identifiers(pdbfh, chain_ids) + new_pdb = run(pdbfh, chain_ids) try: _buffer = [] diff --git a/pdbtools/pdb_rplresname.py b/pdbtools/pdb_rplresname.py index 8092ad49..b64f52ec 100644 --- a/pdbtools/pdb_rplresname.py +++ b/pdbtools/pdb_rplresname.py @@ -101,11 +101,29 @@ def check_input(args): sys.stderr.write(emsg.format(name_to)) sys.exit(1) - return (name_from, name_to, fh) + return (fh, name_from, name_to) -def rename_residues(fhandle, name_from, name_to): - """Changes the residue name of residues matching a pattern to another. +def run(fhandle, name_from, name_to): + """ + Change the residue name of residues matching a pattern to another. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + name_from : str + The original name of the residue to change. + + name_to : str + The name to change to. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ records = ('ATOM', 'HETATM', 'TER', 'ANISOU') @@ -118,12 +136,15 @@ def rename_residues(fhandle, name_from, name_to): yield line +rename_residues = run + + def main(): # Check Input - name_from, name_to, pdbfh = check_input(sys.argv[1:]) + pdbfh, name_from, name_to = check_input(sys.argv[1:]) # Do the job - new_pdb = rename_residues(pdbfh, name_from, name_to) + new_pdb = run(pdbfh, name_from, name_to) # Output results try: diff --git a/pdbtools/pdb_seg.py b/pdbtools/pdb_seg.py index 1ec715f6..4a4e8e2f 100644 --- a/pdbtools/pdb_seg.py +++ b/pdbtools/pdb_seg.py @@ -98,7 +98,7 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def pad_line(line): @@ -110,10 +110,24 @@ def pad_line(line): return line[:81] # 80 + newline character -def alter_segid(fhandle, segment_id): - """Sets the segment identifier column in all ATOM/HETATM records to a value. +def run(fhandle, segment_id): """ + Set the segment identifier column in all ATOM/HETATM records to a value. + This function is a generator. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line. + + segment_id : str + The new segment ID. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ _pad_line = pad_line records = ('ATOM', 'HETATM') for line in fhandle: @@ -124,12 +138,15 @@ def alter_segid(fhandle, segment_id): yield line +alter_segid = run + + def main(): # Check Input - segment_id, pdbfh = check_input(sys.argv[1:]) + pdbfh, segment_id = check_input(sys.argv[1:]) # Do the job - new_pdb = alter_segid(pdbfh, segment_id) + new_pdb = run(pdbfh, segment_id) try: _buffer = [] diff --git a/pdbtools/pdb_segxchain.py b/pdbtools/pdb_segxchain.py index 5175f520..5483717b 100644 --- a/pdbtools/pdb_segxchain.py +++ b/pdbtools/pdb_segxchain.py @@ -82,10 +82,22 @@ def pad_line(line): return line[:81] # 80 + newline character -def place_seg_on_chain(fhandle): - """Replaces the chain identifier with the contents of the segment identifier. +def run(fhandle): + """ + Replace the chain identifier with the contents of the segment identifier. Truncates the segment identifier to its first character. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. """ prev_line = None @@ -108,12 +120,15 @@ def place_seg_on_chain(fhandle): yield line +place_seg_on_chain = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = place_seg_on_chain(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_selaltloc.py b/pdbtools/pdb_selaltloc.py index 959403e5..46adf68e 100644 --- a/pdbtools/pdb_selaltloc.py +++ b/pdbtools/pdb_selaltloc.py @@ -109,13 +109,24 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) def select_by_occupancy(fhandle): - """Picks the altloc with the highest occupancy. """ + Pick the altloc with the highest occupancy. + This function is a generator. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line. + + Yields + ------ + str (line-by-line) + The PDB file with altlocs of highest occupancy only. + """ atom_prop = {} atom_prop_setd = atom_prop.setdefault atom_data = [] @@ -174,14 +185,25 @@ def select_by_occupancy(fhandle): def select_by_altloc(fhandle, selloc): - """Picks one altloc when atoms have more than one. - - If the specified altloc (selloc) is not present for this particular atom, - outputs all altlocs. For instance, if atom X has altlocs A and B but the - user picked C, we return A and B anyway. If atom Y has altlocs A, B, and C, - then we only return C. """ + Pick one altloc when atoms have more than one. + + If the specified altloc (selloc) is not present for this particular + atom, outputs all altlocs. For instance, if atom X has altlocs A and + B but the user picked C, we return A and B anyway. If atom Y has + altlocs A, B, and C, then we only return C. + This function is a generator. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line. + + Yields + ------ + str (line-by-line) + The PDB file with altlocs according to selection. + """ # We have to iterate multiple times atom_prop = {} atom_prop_setd = atom_prop.setdefault @@ -232,15 +254,35 @@ def select_by_altloc(fhandle, selloc): yield line +def run(fhandle, option=None): + """ + Selects altloc labels for the entire PDB file. + + Parameters + ---------- + fhandle : an iterable giving PDB file line-by-line. + + Returns + ------- + generator + If option is None, return `select_by_occupancy` generator. + If option is given, return `select_by_altloc` generator. + See `pdb_selaltloc.select_by_occupancy` and + `pdb_selaltloc.select_by_altloc` for more details. + """ + if option is None: + return select_by_occupancy(fhandle) + + else: + return select_by_altloc(fhandle, option) + + def main(): # Check Input - option, pdbfh = check_input(sys.argv[1:]) + pdbfh, option = check_input(sys.argv[1:]) # Do the job - if option is None: - new_pdb = select_by_occupancy(pdbfh) - else: - new_pdb = select_by_altloc(pdbfh, option) + new_pdb = run(pdbfh, option) try: _buffer = [] diff --git a/pdbtools/pdb_selatom.py b/pdbtools/pdb_selatom.py index 7307bc8c..3ecbe978 100644 --- a/pdbtools/pdb_selatom.py +++ b/pdbtools/pdb_selatom.py @@ -110,13 +110,28 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def filter_atoms(fhandle, atomname_set): - """Removes specific atoms that do not match a given atom name. +def run(fhandle, atomname_set): """ + Filter to selected atoms. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + atomname_set : set, list, or tuple + The names of the desired atoms. + + Yields + ------ + str (line-by-line) + All non-RECORD lines and RECORD lines within the selected atom + names. + """ records = ('ATOM', 'HETATM', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -125,12 +140,15 @@ def filter_atoms(fhandle, atomname_set): yield line +filter_atoms = run + + def main(): # Check Input - atomname_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, atomname_set = check_input(sys.argv[1:]) # Do the job - new_pdb = filter_atoms(pdbfh, atomname_set) + new_pdb = run(pdbfh, atomname_set) try: _buffer = [] diff --git a/pdbtools/pdb_selchain.py b/pdbtools/pdb_selchain.py index 949e0bdd..7bda4c86 100644 --- a/pdbtools/pdb_selchain.py +++ b/pdbtools/pdb_selchain.py @@ -108,13 +108,28 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def select_chain(fhandle, chain_set): - """Filters the PDB file for specific chain identifiers. +def run(fhandle, chain_set): """ + Filter the PDB file for specific chain identifiers. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + chain_set : set, or list, or tuple + The group of chains to kepp. + Example: ('A', 'B'), keeps only atoms from chains A and B. + + Yields + ------ + str (line-by-line) + The PDB lines for those matching the selected chains. + """ records = ('ATOM', 'HETATM', 'TER', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -123,12 +138,15 @@ def select_chain(fhandle, chain_set): yield line +select_chain = run + + def main(): # Check Input - chain, pdbfh = check_input(sys.argv[1:]) + pdbfh, chain = check_input(sys.argv[1:]) # Do the job - new_pdb = select_chain(pdbfh, chain) + new_pdb = run(pdbfh, chain) try: _buffer = [] diff --git a/pdbtools/pdb_selelem.py b/pdbtools/pdb_selelem.py index 01a15011..0f599e22 100644 --- a/pdbtools/pdb_selelem.py +++ b/pdbtools/pdb_selelem.py @@ -110,13 +110,28 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def delete_elements(fhandle, element_set): - """Removes specific atoms matching the given element(s). +def run(fhandle, element_set): """ + Remove specific atoms matching the given element(s). + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + element_set : set, or list, or tuple + The group of chains to remove. + Example: ('N', 'C'), removes nitrogens and carbons. + + Yields + ------ + str (line-by-line) + The PDB lines except for those matching the elements to remove. + """ records = ('ATOM', 'HETATM', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -125,12 +140,15 @@ def delete_elements(fhandle, element_set): yield line +delete_elements = run + + def main(): # Check Input - element_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, element_set = check_input(sys.argv[1:]) # Do the job - new_pdb = delete_elements(pdbfh, element_set) + new_pdb = run(pdbfh, element_set) try: _buffer = [] diff --git a/pdbtools/pdb_selhetatm.py b/pdbtools/pdb_selhetatm.py index 35ec5b09..033d7184 100644 --- a/pdbtools/pdb_selhetatm.py +++ b/pdbtools/pdb_selhetatm.py @@ -69,10 +69,21 @@ def check_input(args): return fh -def select_hetatm(fhandle): - """Selects all HETATM and associated records from the PDB file. +def run(fhandle): """ + Select all HETATM and associated records from the PDB file. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The HETATM lines. + """ # CONECT 1179 746 1184 1195 1203 char_ranges = (slice(6, 11), slice(11, 16), slice(16, 21), slice(21, 26), slice(26, 31)) @@ -90,12 +101,15 @@ def select_hetatm(fhandle): yield line +select_hetatm = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = select_hetatm(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_selres.py b/pdbtools/pdb_selres.py index 9d0988eb..81f08013 100644 --- a/pdbtools/pdb_selres.py +++ b/pdbtools/pdb_selres.py @@ -203,13 +203,28 @@ def _validate_opt_range(value, resid_list): singleres = _validate_opt_numeric(entry) residue_range.add(singleres) - return (residue_range, fh) + return (fh, residue_range) -def select_residues(fhandle, residue_range): - """Outputs residues within a certain numbering range. +def run(fhandle, residue_range): """ + Filter residues within a certain numbering range. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file.. + + residue_range : list of ints + The residues to consider. Residues outside this range are + not yield. + + Yields + ------ + str (line-by-line) + All non-RECORDS lines plus RECORDS within the residue range. + """ prev_res = None records = ('ATOM', 'HETATM', 'TER', 'ANISOU') for line in fhandle: @@ -225,12 +240,15 @@ def select_residues(fhandle, residue_range): yield line +select_residuese = run + + def main(): # Check Input - resrange, pdbfh = check_input(sys.argv[1:]) + pdbfh, resrange = check_input(sys.argv[1:]) # Do the job - new_pdb = select_residues(pdbfh, resrange) + new_pdb = run(pdbfh, resrange) try: _buffer = [] diff --git a/pdbtools/pdb_selresname.py b/pdbtools/pdb_selresname.py index 62ef277c..89bfee54 100644 --- a/pdbtools/pdb_selresname.py +++ b/pdbtools/pdb_selresname.py @@ -109,13 +109,30 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def filter_residue_by_name(fhandle, resname_set): - """Removes specific residue that do not match a given name. +def run(fhandle, resname_set): """ + Keep specified residue names, remove all others. + Non-coords lines are maintained. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + resname_set : set, list, or tuple + The name of the residues to keep. + + Yields + ------ + str (line-by-line) + The PDB lines for the residues selected. + Non-coord lines are yielded as well. + """ records = ('ATOM', 'HETATM', 'ANISOU', 'TER') for line in fhandle: if line.startswith(records): @@ -124,12 +141,15 @@ def filter_residue_by_name(fhandle, resname_set): yield line +filter_residue_by_name = run + + def main(): # Check Input - resname_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, resname_set = check_input(sys.argv[1:]) # Do the job - new_pdb = filter_residue_by_name(pdbfh, resname_set) + new_pdb = run(pdbfh, resname_set) try: _buffer = [] diff --git a/pdbtools/pdb_selseg.py b/pdbtools/pdb_selseg.py index 02fd4198..006f43c5 100644 --- a/pdbtools/pdb_selseg.py +++ b/pdbtools/pdb_selseg.py @@ -108,13 +108,27 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option_set, fh) + return (fh, option_set) -def select_segment_id(fhandle, segment_set): - """Filters the PDB file for specific segment identifiers. +def run(fhandle, segment_set): """ + Filter the PDB file for specific segment identifiers. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + segment_set : set, list, or tuple + The set of segment identifiers. + + Yields + ------ + str (line-by-line) + The lines only from the segment set. + """ records = ('ATOM', 'HETATM', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -123,12 +137,15 @@ def select_segment_id(fhandle, segment_set): yield line +select_segment_id = run + + def main(): # Check Input - segment_set, pdbfh = check_input(sys.argv[1:]) + pdbfh, segment_set = check_input(sys.argv[1:]) # Do the job - new_pdb = select_segment_id(pdbfh, segment_set) + new_pdb = run(pdbfh, segment_set) try: _buffer = [] diff --git a/pdbtools/pdb_shiftres.py b/pdbtools/pdb_shiftres.py index 81d8005d..0e5af483 100644 --- a/pdbtools/pdb_shiftres.py +++ b/pdbtools/pdb_shiftres.py @@ -101,14 +101,29 @@ def check_input(args): sys.stderr.write(emsg.format(option)) sys.exit(1) - return (option, fh) + return (fh, option) -def renumber_residues(fhandle, shifting_factor): - """Renumbers residues by adding/subtracting a factor from the original - numbering. +def run(fhandle, shifting_factor): """ + Renumber residues by a factor. + Adds/subtracts a factor from the original numbering. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + shifting_factor : int + The shifting factor. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ records = ('ATOM', 'HETATM', 'TER', 'ANISOU') for line in fhandle: if line.startswith(records): @@ -123,12 +138,15 @@ def renumber_residues(fhandle, shifting_factor): yield line +renumber_residues = run + + def main(): # Check Input - shifting_factor, pdbfh = check_input(sys.argv[1:]) + pdbfh, shifting_factor = check_input(sys.argv[1:]) # Do the job - new_pdb = renumber_residues(pdbfh, shifting_factor) + new_pdb = run(pdbfh, shifting_factor) # Output results try: diff --git a/pdbtools/pdb_sort.py b/pdbtools/pdb_sort.py index c6038c04..db6b9516 100644 --- a/pdbtools/pdb_sort.py +++ b/pdbtools/pdb_sort.py @@ -123,13 +123,24 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option, fh) + return (fh, option) -def sort_file(fhandle, sorting_keys): - """Sorts the contents of the PDB file. +def run(fhandle, sorting_keys): """ + Sort the contents of the PDB file. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The sorted PDB lines. + """ # Sort keys chain_key = lambda x: x[21] # chain id resid_key = lambda x: (int(x[22:26]), x[26]) # resid, icode @@ -218,9 +229,12 @@ def sort_file(fhandle, sorting_keys): yield anisou_record +sort_file = run + + def main(): # Check Input - chain, pdbfh = check_input(sys.argv[1:]) + pdbfh, chain = check_input(sys.argv[1:]) # Do the job new_pdb = sort_file(pdbfh, chain) diff --git a/pdbtools/pdb_splitchain.py b/pdbtools/pdb_splitchain.py index 871fa961..32c4be3d 100644 --- a/pdbtools/pdb_splitchain.py +++ b/pdbtools/pdb_splitchain.py @@ -71,11 +71,17 @@ def check_input(args): return fh -def split_chain(fhandle): - """Splits the contents of the PDB file into new files, each containing a chain - of the original file +def run(fhandle): """ + Split the PDB into its different chains. + Writes a new file to the disk for each chain. Non-record lines are + ignored. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line + """ fname_root = fhandle.name[:-4] if fhandle.name != '' else 'output' basename = os.path.basename(fname_root) @@ -98,12 +104,15 @@ def split_chain(fhandle): fh.write(''.join(lines)) +split_chain = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - split_chain(pdbfh) + run(pdbfh) # last line of the script # We can close it even if it is sys.stdin diff --git a/pdbtools/pdb_splitmodel.py b/pdbtools/pdb_splitmodel.py index 390d8ccf..5702fa5b 100644 --- a/pdbtools/pdb_splitmodel.py +++ b/pdbtools/pdb_splitmodel.py @@ -71,11 +71,17 @@ def check_input(args): return fh -def split_model(fhandle): - """Splits the contents of the PDB file into new files, each containing a - MODEL in the original file +def run(fhandle): """ + Split PDB into MODELS. + Each MODELS is saved to a different file. Non-records lines are + ignored. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + """ fname_root = fhandle.name[:-4] if fhandle.name != '' else 'pdbfile' basename = os.path.basename(fname_root) @@ -95,12 +101,15 @@ def split_model(fhandle): model_lines.append(line) +split_model = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - split_model(pdbfh) + run(pdbfh) # last line of the script # We can close it even if it is sys.stdin diff --git a/pdbtools/pdb_splitseg.py b/pdbtools/pdb_splitseg.py index 0c2a56a2..edce0fd8 100644 --- a/pdbtools/pdb_splitseg.py +++ b/pdbtools/pdb_splitseg.py @@ -71,11 +71,17 @@ def check_input(args): return fh -def split_segment(fhandle): - """Splits the contents of the PDB file into new files, each containing a - segment of the original file. +def run(fhandle): """ + Split PDB into segments. + Each segment is saved to a different file. Non-records lines are + ignored. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + """ fname_root = fhandle.name[:-4] if fhandle.name != '' else 'output' basename = os.path.basename(fname_root) @@ -101,12 +107,15 @@ def split_segment(fhandle): fh.write(''.join(lines)) +split_segment = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - split_segment(pdbfh) + run(pdbfh) # last line of the script # We can close it even if it is sys.stdin diff --git a/pdbtools/pdb_tidy.py b/pdbtools/pdb_tidy.py index 73351264..c355189f 100644 --- a/pdbtools/pdb_tidy.py +++ b/pdbtools/pdb_tidy.py @@ -103,15 +103,27 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option, fh) + return (fh, option) -def tidy_pdbfile(fhandle, strict=False): - """Adds TER/END statements and pads all lines to 80 characters. - - If strict is True, does not add TER statements at intra-chain breaks. +def run(fhandle, strict=False): """ + Add TER/END statements and pads all lines to 80 characters. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + strict : bool + If True, does not add TER statements at intra-chain breaks. + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ not_strict = not strict def make_TER(prev_line): @@ -219,12 +231,15 @@ def make_TER(prev_line): yield "{:<80}\n".format("END") +tidy_pdbfile = run + + def main(): # Check Input - strict, pdbfh = check_input(sys.argv[1:]) + pdbfh, strict = check_input(sys.argv[1:]) # Do the job - new_pdb = tidy_pdbfile(pdbfh, strict) + new_pdb = run(pdbfh, strict) try: _buffer = [] diff --git a/pdbtools/pdb_tocif.py b/pdbtools/pdb_tocif.py index 718f307e..89b44de7 100644 --- a/pdbtools/pdb_tocif.py +++ b/pdbtools/pdb_tocif.py @@ -80,10 +80,21 @@ def pad_line(line): return line[:81] # 80 + newline character -def convert_to_mmcif(fhandle): - """Converts a structure in PDB format to mmCIF format. +def run(fhandle): """ + Convert a structure in PDB format to mmCIF format. + This function is a generator. + + Parameters + ---------- + fhandle : an iterable giving the PDB file line-by-line. + + Yields + ------ + str (line-by-line) + The structure in mmCIF format. + """ _pad_line = pad_line # The spacing here is just aesthetic purposes when printing the file @@ -183,12 +194,15 @@ def convert_to_mmcif(fhandle): yield '#' # close block +convert_to_mmcif = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_cif = convert_to_mmcif(pdbfh) + new_cif = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_tofasta.py b/pdbtools/pdb_tofasta.py index 18bbfb25..2355831a 100644 --- a/pdbtools/pdb_tofasta.py +++ b/pdbtools/pdb_tofasta.py @@ -105,14 +105,28 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - return (option, fh) + return (fh, option) -def pdb_to_fasta(fhandle, multi): - """Reads residue names of ATOM/HETATM records and exports them to a FASTA - file. +def run(fhandle, multi): """ + Read residue names of ATOM/HETATM records and exports them to a FASTA + file. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + multi : bool + Whether to concatenate FASTA of multiple chains. + + Yields + ------ + str + The different FASTA contents. + """ res_codes = [ # 20 canonical amino acids ('CYS', 'C'), ('ASP', 'D'), ('SER', 'S'), ('GLN', 'Q'), @@ -170,12 +184,15 @@ def pdb_to_fasta(fhandle, multi): yield ''.join(fmt_seq) +pdb_to_fasta = run + + def main(): # Check Input - multi, pdbfh = check_input(sys.argv[1:]) + pdbfh, multi = check_input(sys.argv[1:]) # Do the job - fasta = pdb_to_fasta(pdbfh, multi) + fasta = run(pdbfh, multi) # Output results try: diff --git a/pdbtools/pdb_uniqname.py b/pdbtools/pdb_uniqname.py index dbe557a2..d5372c44 100644 --- a/pdbtools/pdb_uniqname.py +++ b/pdbtools/pdb_uniqname.py @@ -71,10 +71,21 @@ def check_input(args): return fh -def rename_atoms(fhandle): - """Renames HETATM atoms on each residue based on their element. +def run(fhandle): """ + Rename HETATM atoms on each residue based on their element. + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ prev_res = None for line_idx, line in enumerate(fhandle): if line.startswith('HETATM'): @@ -101,12 +112,15 @@ def rename_atoms(fhandle): yield line +rename_atoms = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - new_pdb = rename_atoms(pdbfh) + new_pdb = run(pdbfh) try: _buffer = [] diff --git a/pdbtools/pdb_validate.py b/pdbtools/pdb_validate.py index 5b26d11c..29274078 100644 --- a/pdbtools/pdb_validate.py +++ b/pdbtools/pdb_validate.py @@ -72,14 +72,22 @@ def check_input(args): return fh -def check_pdb_format(fhandle): +def run(fhandle): """ - Compares each ATOM/HETATM line with the format defined on the official - PDB website. + Compare each ATOM/HETATM line with the format defined on the + official PDB website. http://deposit.rcsb.org/adit/docs/pdb_atom_format.html - """ + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + Returns + ------- + int + 1 if error was found. 0 if no errors were found. + """ has_error = False _fmt_check = ( ('Atm. Num.', (slice(6, 11), re.compile(r'[\d\s]+'))), @@ -161,12 +169,15 @@ def _make_pointer(column): return 0 +check_pdb_format = run + + def main(): # Check Input pdbfh = check_input(sys.argv[1:]) # Do the job - retcode = check_pdb_format(pdbfh) + retcode = run(pdbfh) # last line of the script # We can close it even if it is sys.stdin diff --git a/pdbtools/pdb_wc.py b/pdbtools/pdb_wc.py index 20356abc..1c1e419a 100644 --- a/pdbtools/pdb_wc.py +++ b/pdbtools/pdb_wc.py @@ -112,11 +112,34 @@ def check_input(args): sys.stderr.write(emsg.format(diff)) sys.exit(1) - return (option, fh) + return (fh, option) -def summarize_file(fhandle, option): - """Returns summary of models, chains, residue, and atoms. +def run(fhandle, option): + """ + Report on PDB models, chains, residue, and atoms. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + option : str + String with the characters of the options. + By default, this tool produces a general summary, but you can + use several options to produce focused but more detailed + summaries: + [m] - no. of models. + [c] - no. of chains (plus per-model if multi-model file). + [r] - no. of residues (plus per-model if multi-model file). + [a] - no. of atoms (plus per-model if multi-model file). + [h] - no. of HETATM (plus per-model if multi-model file). + [o] - presence of disordered atoms (altloc). + [i] - presence of insertion codes. + + Returns + ------- + None + Writes to `sys.stdout`. """ models = set() @@ -242,12 +265,15 @@ def summarize_file(fhandle, option): ) +summarize_file = run + + def main(): # Check Input - option, pdbfh = check_input(sys.argv[1:]) + pdbfh, option = check_input(sys.argv[1:]) # Do the job - summarize_file(pdbfh, option) + run(pdbfh, option) # last line of the script # We can close it even if it is sys.stdin