From 302c9d794b4a78f13160d63d4f57196c5e2b09d9 Mon Sep 17 00:00:00 2001
From: Calvin <calvinpieters@gmail.com>
Date: Mon, 19 Aug 2024 14:52:56 +0300
Subject: [PATCH] Gaussian Trsh: Corrected and Improved Max Steps Exceeded
 Error

- Removed 'SCF' from Number of steps exceeded error
- Changed the logging info for 'scf=' and 'opt=' to better suit code additions/changes in the future
- Added more troubleshoot options in trsh_keyword_opt_maxcycles function amd also an opt formatter for trsh_keyword
- Created a function to change optimisation methods based on preferred order (possible extension in the future for other paramters that need preferred method orders)
---
 arc/job/trsh.py | 74 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 15 deletions(-)

diff --git a/arc/job/trsh.py b/arc/job/trsh.py
index df27442b5c..cafb80e90a 100644
--- a/arc/job/trsh.py
+++ b/arc/job/trsh.py
@@ -94,7 +94,7 @@ def determine_ess_status(output_path: str,
                         cycle_issue = False
                         for j in range(i,len(reverse_lines)):
                             if 'Number of steps exceeded' in reverse_lines[j]:
-                                keywords = ['MaxOptCycles', 'GL9999', 'SCF']
+                                keywords = ['MaxOptCycles', 'GL9999']
                                 error = 'Maximum optimization cycles reached.'
                                 cycle_issue = True
                                 line = 'Number of steps exceeded'
@@ -897,14 +897,14 @@ def trsh_ess_job(label: str,
 
         # Check if SCF is in the keyword
         ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_scf(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
-        if 'scf=(maxcycle=512)' in ess_trsh_methods:
-            logger_info.append('using scf=(maxcycle=512)')
-        if 'scf=(NDamp=30)' in ess_trsh_methods:
-            logger_info.append('using scf=(NDamp=30)')
-        if 'scf=(qc)' in ess_trsh_methods:
-            logger_info.append('using scf=(qc)')
-        if 'scf=(NoDIIS)' in ess_trsh_methods:
-            logger_info.append('using scf=(NoDIIS)')
+        if 'scf=' in ess_trsh_methods:
+            add_comma = False
+            for i in ess_trsh_methods:
+                if 'scf=' in i and not add_comma:
+                    add_comma = True
+                    logger_info.append(f'using {i}')
+                elif 'scf=' in i and add_comma:
+                    logger_info.append(f'{i}')
         
         # Check if InaccurateQuadrature in the keyword
         ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_inaccurate_quadrature(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
@@ -922,9 +922,15 @@ def trsh_ess_job(label: str,
         # Troubleshoot by increasing opt max cycles
         #P opt=(calcfc,maxstep=5,tight,maxcycle=200) guess=mix wb97xd/def2tzvp integral=(grid=ultrafine, Acc2E=14) IOp(2/9=2000) scf=(direct,tight,maxcycle=512) iop(3/33=1)
         ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_opt_maxcycles(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
-        if 'opt=(maxcycle=200)' in ess_trsh_methods:
-            logger_info.append('using opt=(maxcycle=200)')
-        
+        # print out any words that beging with 'opt='
+        if 'opt=' in ess_trsh_methods:
+            add_comma = False
+            for i in ess_trsh_methods:
+                if 'opt=' in i and not add_comma:
+                    add_comma = True
+                    logger_info.append(f'using {i}')
+                elif 'opt=' in i and add_comma:
+                    logger_info.append(f'{i}')
 
 
         # Check if memory is in the keyword
@@ -1797,7 +1803,7 @@ def trsh_keyword_scf(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -
         couldnt_trsh = False
         trsh_keyword.append('guess=INDO')
     # If we have attempted all scf methods above, then we will try last resort methods
-    if 'SCF' in job_status['keywords'] and 'scf=(maxcycle=128)' in ess_trsh_methods and 'scf=(qc)' in ess_trsh_methods and 'scf=(NDamp=30)' in ess_trsh_methods and 'scf=(NoDIIS)' in ess_trsh_methods and 'guess=INDO' in ess_trsh_methods \
+    if 'SCF' in job_status['keywords'] and 'scf=(qc)' in ess_trsh_methods and 'scf=(NDamp=30)' in ess_trsh_methods and 'scf=(NoDIIS)' in ess_trsh_methods and 'guess=INDO' in ess_trsh_methods \
         and 'scf=(Fermi)' not in ess_trsh_methods and 'scf=(Noincfock)' not in ess_trsh_methods and 'scf=(NoVarAcc)' not in ess_trsh_methods:
         # Uses Fermi broadening to help SCF convergence
         ess_trsh_methods.append('scf=(Fermi)')
@@ -1844,11 +1850,34 @@ def trsh_keyword_opt_maxcycles(job_status, ess_trsh_methods, trsh_keyword, could
     """
     Check if the job requires change of opt(maxcycle=200)
     """
-    
-    if 'MaxOptCycles' in job_status['keywords'] and 'opt=(maxcycles=200)' not in ess_trsh_methods:
+    opt_pattern = r"opt=\((.*?)\)"
+    if 'MaxOptCycles' in job_status['keywords'] and 'opt=(maxcycle=200)' not in ess_trsh_methods:
         ess_trsh_methods.append('opt=(maxcycle=200)')
         trsh_keyword.append('opt=(maxcycle=200)')
         couldnt_trsh = False
+    elif 'MaxOptCycles' in job_status['keywords'] and 'opt=(RFO)' not in ess_trsh_methods:
+        ess_trsh_methods.append('opt=(RFO)')
+        trsh_keyword.append('opt=(RFO)')
+        couldnt_trsh = False
+    elif 'MaxOptCycles' in job_status['keywords']  and 'opt=(RFO)' in ess_trsh_methods and 'opt=(GDIIS)' not in ess_trsh_methods:
+        ess_trsh_methods.append('opt=(GDIIS)')
+        trsh_keyword.append('opt=(GDIIS)')
+        couldnt_trsh = False
+    elif 'MaxOptCycles' in job_status['keywords']  and 'opt=(RFO)' in ess_trsh_methods and 'opt=(GDIIS)' in ess_trsh_methods and 'opt=(GEDIIS)' not in ess_trsh_methods:
+        ess_trsh_methods.append('opt=(GEDIIS)')
+        trsh_keyword.append('opt=(GEDIIS)')
+        couldnt_trsh = False
+    
+    if any('opt' in keyword for keyword in ess_trsh_methods):
+        opt_list = [match for element in ess_trsh_methods for match in re.findall(opt_pattern, element)] if any(re.search(opt_pattern, element) for element in ess_trsh_methods) else []
+
+        if opt_list:
+
+            filtered_methods = prioritize_opt_methods(opt_list)
+
+            new_opt_keyword = 'opt=(' + ','.join(filtered_methods) + ')'
+
+            trsh_keyword = [kw if not kw.startswith('opt') else new_opt_keyword for kw in trsh_keyword]
     
     return ess_trsh_methods, trsh_keyword, couldnt_trsh
 
@@ -1895,3 +1924,18 @@ def trsh_keyword_inaccurate_quadrature(job_status, ess_trsh_methods, trsh_keywor
         trsh_keyword.append('scf=(' + ','.join(scf_list) + ')')
     
     return ess_trsh_methods, trsh_keyword, couldnt_trsh
+
+def prioritize_opt_methods(opt_methods):
+
+    preferred_order = ['GEDIIS', 'GDIIS', 'RFO']
+    selected_method = None
+    
+    for method in preferred_order:
+        if method in opt_methods:
+            selected_method = method
+            break
+    
+    filtered_methods = [method for method in opt_methods if method not in preferred_order or method == selected_method]
+
+    return filtered_methods
+