diff --git a/ai_ta_backend/database/sql.py b/ai_ta_backend/database/sql.py
index a8287234..da854891 100644
--- a/ai_ta_backend/database/sql.py
+++ b/ai_ta_backend/database/sql.py
@@ -11,7 +11,7 @@ def __init__(self):
     # Create a Supabase client
     self.supabase_client = supabase.create_client(  # type: ignore
         supabase_url=os.environ['SUPABASE_URL'], supabase_key=os.environ['SUPABASE_API_KEY'])
-
+    
   def getAllMaterialsForCourse(self, course_name: str):
     return self.supabase_client.table(
         os.environ['SUPABASE_DOCUMENTS_TABLE']).select('course_name, s3_path, readable_filename, url, base_url').eq(
@@ -134,3 +134,5 @@ def getConversation(self, course_name: str, key: str, value: str):
   
   def getDisabledDocGroups(self, course_name: str):
     return self.supabase_client.table("doc_groups").select("name").eq("course_name", course_name).eq("enabled", False).execute()
+  def getCourseDocumentByS3Path(self, course_name: str, s3_path: str):
+    return self.supabase_client.table("documents").select("id, course_name, readable_filename, url, base_url, s3_path, created_at").eq("course_name", course_name).eq("s3_path", s3_path).execute()
\ No newline at end of file
diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py
index 7a746c89..156b5993 100644
--- a/ai_ta_backend/main.py
+++ b/ai_ta_backend/main.py
@@ -40,6 +40,7 @@
 from ai_ta_backend.service.retrieval_service import RetrievalService
 from ai_ta_backend.service.sentry_service import SentryService
 from ai_ta_backend.service.workflow_service import WorkflowService
+from ai_ta_backend.utils.pub_ingest import downloadSpringerFulltext, downloadWileyFulltext
 
 app = Flask(__name__)
 CORS(app)
@@ -480,6 +481,56 @@ def run_flow(service: WorkflowService) -> Response:
       return response
 
 
+@app.route('/get-springer-fulltext', methods=['GET'])
+def get_springer_data():
+  course_name: str = request.args.get('course_name', default='', type=str)
+  issn = request.args.get('issn', default='', type=str)
+  subject = request.args.get('subject', default='', type=str)
+  journal = request.args.get('journal', default='', type=str)
+  title = request.args.get('title', default='', type=str)
+  doi = request.args.get('doi', default='', type=str)
+
+  print("In /get-springer-fulltext")
+
+  if (issn == '' and subject == '' and journal == '' and title == '' and doi == '') or course_name == '':
+    # proper web error "400 Bad request"
+    abort(
+        400,
+        description=
+        f"Missing required parameters: 'issn' or 'subject' or 'title' or 'journal' or 'doi' and 'course_name' must be provided."
+    )
+
+  fulltext = downloadSpringerFulltext(issn, subject, journal, title, doi, course_name)
+
+  response = jsonify(fulltext)
+  response.headers.add('Access-Control-Allow-Origin', '*')
+  return response
+
+@app.route('/get-wiley-fulltext', methods=['POST'])
+def get_wiley_data():
+  data = request.get_json()
+  print(data)
+  
+  course_name = data['course_name']
+  issn = data['issn']
+  
+  print("In /get-wiley-fulltext")
+
+  if issn == [] or course_name == '':
+    # proper web error "400 Bad request"
+    abort(
+        400,
+        description=
+        f"Missing required parameters: 'issn' or 'doi' and 'course_name' must be provided."
+    )
+
+  fulltext = downloadWileyFulltext(course_name, issn)
+
+  response = jsonify(fulltext)
+  response.headers.add('Access-Control-Allow-Origin', '*')
+  return response
+
+
 def configure(binder: Binder) -> None:
   binder.bind(RetrievalService, to=RetrievalService, scope=RequestScope)
   binder.bind(PosthogService, to=PosthogService, scope=SingletonScope)
diff --git a/ai_ta_backend/utils/nal_data_mining.py b/ai_ta_backend/utils/nal_data_mining.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ai_ta_backend/utils/nal_download.py b/ai_ta_backend/utils/nal_download.py
new file mode 100644
index 00000000..ca4d128c
--- /dev/null
+++ b/ai_ta_backend/utils/nal_download.py
@@ -0,0 +1,506 @@
+import os
+from supabase import create_client, Client
+import requests
+import boto3
+from dotenv import load_dotenv
+import datetime
+import time
+
+load_dotenv()
+print("Supabase URL: ", os.getenv("SUPABASE_URL"))
+print("Supabase API key: ", os.getenv("SUPABASE_API_KEY"))
+
+# Initialize the Supabase client
+SUPABASE_CLIENT: Client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_API_KEY"))
+SPRINGER_API_KEY = os.environ.get('SPRINGER_API_KEY')
+DOWNLOAD_LOG = "download_log.txt"
+
+S3_CLIENT = boto3.client('s3', aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'))
+
+AWS_BUCKET = os.getenv('S3_BUCKET_NAME')
+
+CC_LICENSES = {
+    "http://creativecommons.org/licenses/by/4.0/": "CC BY",
+    "http://creativecommons.org/licenses/by-nc/4.0/": "CC BY-NC",
+    "http://creativecommons.org/licenses/by-nc-nd/4.0/": "CC BY-NC-ND",
+    "http://creativecommons.org/licenses/by-nc-sa/4.0/": "CC BY-NC-SA"
+}
+
+OTHER_LICENSES = {
+    "http://onlinelibrary.wiley.com/termsAndConditions#vor": "wiley_tnc",
+    "http://onlinelibrary.wiley.com/termsAndConditions#am": "wiley_tnc",
+    "http://doi.wiley.com/10.1002/tdm_license_1.1": "wiley_tdm"
+}
+
+
+def main():
+    data = [1, 2, 3]
+    # fetch records from SQL
+    while len(data) > 0:
+        response = SUPABASE_CLIENT.table("nal_publications").select("doi_number, publisher, metadata").eq("ingested", False).eq("downloadable", True).neq("publisher", "Wiley").limit(1000).execute()
+        data = response.data
+        print("No. of records: ", len(data))
+        for record in data:
+            if 'Springer' in record['publisher']:
+                # route to springer download
+                result = downloadSpringerFulltext(doi=record['doi_number'])
+                
+            elif 'Wiley' in record['publisher']:
+                # route to wiley download
+                print('Wiley')
+                continue
+                result = downloadWileyPDF(doi=record['doi_number'], metadata=record['metadata'])
+                time.sleep(10) # sleep for 10 seconds to avoid rate limiting
+            elif 'Elsevier' in record['publisher']:
+                # update supabase
+                update_info = {"notes": "Elsevier articles not downloadable.", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+                response = SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", record['doi_number']).execute()
+
+            else:
+                # regular file save
+                print("publisher name: " , record['publisher'])
+                result = download_article_from_url(record['doi_number'], record['metadata'])
+                print(result)
+                #time.sleep(10)
+    return "Success"
+
+
+def download_article_from_url(doi, metadata):    
+    print("in download_article_from_url: ", doi)
+    
+    if 'link' not in metadata:
+        print("No link")
+        # update supabase
+        update_info = {"notes": "Download link absent.", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+        SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+        return "No download link present"
+    else:
+        # save to local
+        print("Link found")
+        pdf_link = metadata['link'][0]['URL']
+
+        if 'license' not in metadata:
+            print("No license")
+            # update supabase
+            update_info = {"notes": "License absent.", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+            SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+            return {"error": "License not found."}
+
+        license = get_license(metadata['license'][0]['URL'])
+
+        status = download_pdf_in_chunks(url=pdf_link, doi=doi)
+        if 'failed' in status:
+            # update supabase
+            print("Error in PDF download: ", status['failed'])
+            update_info = {"notes": str(status['failed']), "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+            SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+            return {"error": "Error in PDF download."}
+        else:
+            filepath = status['success']
+
+            updated_metadata = {
+                "doi": doi,
+                "filename": filepath.split("/")[-1],
+                "file_path": filepath,
+                "publisher": metadata['publisher'],
+                "license": license,
+            }
+            print("Updated metadata: ", updated_metadata)
+            
+            ingest_status = upload_and_ingest(filepath, updated_metadata, doi)
+            print(ingest_status)
+
+            return {"success": "Downloaded and ingested successfully."}
+
+def upload_and_ingest(filepath, metadata, doi):
+    """
+    Uploads file to S3 and ingests them into cropwizard-1.5
+    """
+    filename = os.path.basename(filepath)
+
+    s3_path = "courses/cropwizard-1.5/" + filename
+
+    S3_CLIENT.upload_file(filepath, AWS_BUCKET, s3_path)
+    
+    publisher = metadata['publisher']
+    if 'Springer' in publisher:
+        publisher = "Springer"
+    elif 'Wiley' in publisher:
+        publisher = "Wiley"
+
+    
+    # ingest
+    ingest_url = "https://ingest-task-queue-6ee4a59-v12.app.beam.cloud"
+    ingest_headers = {
+          'Accept': '*/*',
+          'Accept-Encoding': 'gzip, deflate',
+          'Authorization': f"Bearer {os.environ['BEAM_API_KEY']}",
+          'Content-Type': 'application/json',
+    }
+    doi_url = f"https://doi.org/{doi}"
+    ingest_payload = {
+        "course_name": "cropwizard-1.5",
+        "s3_paths": [s3_path],
+        "readable_filename": filename,
+        "url": doi_url,
+        "base_url": "",
+        "groups": ["Research Papers", "NAL", publisher]
+    }
+    
+    if 'license' in metadata and metadata['license'] not in ['Unknown', 'unknown']:
+        ingest_payload['groups'].append(metadata['license'])
+
+    print("FINAL INGEST PAYLOAD: ", ingest_payload)
+    ingest_response = requests.post(ingest_url, headers=ingest_headers, json=ingest_payload)
+
+    # update supabase
+    update_info = {"ingested": True, "modified_date": datetime.datetime.now().isoformat()}
+    response = SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+    return "success"
+
+
+def download_pdf_in_chunks(url, doi, chunk_size=1024):
+    try:
+        
+        # create directory to store files
+        directory = "other_papers"
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        # Send a GET request to the URL with stream=True to download in chunks
+        response = requests.get(url, stream=True)
+        
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Open the file in binary write mode
+            filename = doi.replace("/", "_")
+            filepath = "other_papers/" + filename + ".pdf"
+            with open(filepath, 'wb') as file:
+                # Iterate over the response in chunks and write each to the file
+                for chunk in response.iter_content(chunk_size=chunk_size):
+                    if chunk:  # Filter out keep-alive chunks
+                        file.write(chunk)
+            print(f"PDF successfully downloaded and saved as {filepath}")
+
+            return {"success": filepath}
+
+        else:
+            print(f"Failed to download PDF. Status code: {response.status_code}")
+
+            # update supabase
+            update_info = {"notes": f"Failed to download PDF (anti-bot). Status code: {response.status_code}",
+                           "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+            SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+
+            return {"failed": response.status_code}
+    
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return {"failed": e}
+    
+
+############# SPRINGER DOWNLOAD #############
+
+def downloadSpringerFulltext(issn=None, subject=None, journal=None, title=None, doi=None, course_name=None):
+    """
+    This function uses the Springer Nature API to download openaccess journal articles.
+    Args:
+        issn: limit to ISSN number of the journal/book
+        subject: limit articles to a specific subject - Chemistry, Physics, etc.
+        journal: limit to keywords occuring in journal title
+        title: limit to keywords occuring in article title
+    The initial API response returns a list of articles with metadata.
+    
+    """
+    print("in downloadSpringerFulltext")
+    # create directory to store files
+    directory = os.path.join(os.getcwd(), 'springer_papers')
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    # set headers
+    api_url = "http://api.springernature.com/openaccess/json?q="
+    headers = {'Accept': 'application/json'}
+
+    # form the query URL based on the input parameters received
+    if doi:
+        query_str = "doi:" + doi
+    elif issn:
+        query_str = "issn:" + issn
+    elif journal:   
+        journal = "%22" + journal.replace(" ", "%20") + "%22"
+        query_str = "journal:" + journal
+    elif title:
+        title = "%22" + title.replace(" ", "%20") + "%22"
+        query_str = "title:" + title
+    elif subject:
+        query_str = "subject:" + subject
+    else:
+        return "No query parameters provided"
+    
+    main_url = api_url + query_str + "&api_key=" + str(SPRINGER_API_KEY)
+    print("Full URL: ", main_url)
+    
+    response = requests.get(main_url, headers=headers)
+
+    if response.status_code != 200:
+        print("Error in accessing Springer API: ", response.text)
+        response = SUPABASE_CLIENT.table("nal_publications").update({"notes": f"Error in accessing Springer API. Status code: {response.text}", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}).eq("doi_number", doi).execute()
+        return "Error" 
+
+    data = response.json()
+    # check for total number of records 
+    total_records = int(data['result'][0]['total'])
+
+    if total_records == 0:
+        # update supabase record and exit
+        response = SUPABASE_CLIENT.table("nal_publications").update({"notes": "Article is not OA.", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}).eq("doi_number", doi).execute()
+        return "Article not OA."
+    else:
+        # download paper
+        download_info = downloadPDFSpringer(data['records'][0], directory)
+        
+        if 'error' in download_info:
+            response = SUPABASE_CLIENT.table("nal_publications").update({"notes": download_info['error'], "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}).eq("doi_number", doi).execute()
+        else:
+            # ingest
+            print("Download info: ", download_info)
+            ingest_status = upload_and_ingest(download_info['file_path'], download_info, doi)
+                                
+    return "success"
+
+def downloadPDFSpringer(record: dict, directory: str):
+    """
+    This function takes a record from the Springer API response and downloads the PDF file.
+    It is called in a multi-process loop in downloadSpringerFulltext().
+    Args:
+        record: dictionary containing DOI and other metadata
+        directory: local directory to save the files
+    """
+    print("in downloadPDFSpringer")
+    headers = {'Accept': 'application/json'}
+
+    if len(record['url']) < 1:
+        return "No download link found for DOI: " + record['doi']
+
+    # extract URL
+    url = record['url'][0]['value'] + "?api_key=" + str(SPRINGER_API_KEY)
+    
+    url_response = requests.get(url, headers=headers)
+    
+    if url_response.status_code != 200:
+        return {"error": "Error in accessing article link: " + str(url_response.status_code) + " - " + url_response.text}
+    
+    url_data = url_response.json()
+
+    if 'license' in url_data:
+        license_url = url_data['license'][0]['URL']
+        license = get_license(license_url)
+        print("License: ", license)
+    else:
+        license = "unknown"
+        license_url = "unknown"
+
+    # extract PDF link
+    pdf_link = None
+    if 'link' not in url_data:
+        return {"error": "No link found for DOI: " + record['doi']}
+    
+    links = url_data['link']
+    for link in links:
+        if link['content-type'] == 'application/pdf' and link['intended-application'] == 'text-mining':
+            pdf_link = link['URL']
+            
+            break
+        
+    if not pdf_link:
+        pdf_link = links[0]['URL']
+        
+        if not pdf_link:
+            return {"error": "No PDF link found for DOI: " + record['doi']}
+    
+    # download PDF
+    
+    if 'doi' in record:
+        filename = record['doi'].replace("/", "_")
+    else:
+        filename = url_data['DOI'].replace("/", "_")
+
+    try:
+        response = requests.get(pdf_link)
+        if response.status_code != 200:
+            return {"error": "Error in downloading PDF: " + str(response.status_code) + " - " + response.text}
+        
+        with open(directory + "/" + filename + ".pdf", "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        
+
+        # form metadata
+        metadata = {
+            "doi": record['doi'],
+            "publisher": record['publisher'],
+            "issn": record['issn'],
+            "license": license,
+            "license_url": license_url,
+            "filename": filename + ".pdf",
+            "file_path": directory + "/" + filename + ".pdf"
+        }
+        return metadata
+    except Exception as e:
+        return {"error": "Error in downloading PDF: " + str(e)}
+
+
+def downloadWileyPDF(doi, metadata):
+    """
+    This function downloads a PDF file from Wiley based on the DOI.
+    """
+    print("in downloadWileyPDF")
+    try:
+        # create directory to store files
+        directory = "wiley_papers"
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        api_key = os.environ.get("WILEY_TDM_TOKEN")
+
+        # download PDF based on doi
+        base_url = "https://api.wiley.com/onlinelibrary/tdm/v1/articles/"
+        url = base_url + str(doi)
+        print("URL: ", url)
+
+        headers = {
+            'Wiley-TDM-Client-Token': api_key,
+            'Content-Type': 'application/json'
+        }
+        
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+            
+        filename = str(doi).replace("/", "_") + ".pdf"
+        with open(directory + "/" + filename, "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        print("Downloaded: ", filename)
+
+        # get license
+        license = get_license(metadata['license'][0]['URL'])
+        print("License: ", license)
+
+        # route to upload and ingest
+        updated_metadata = {
+            "doi": doi,
+            "filename": filename,
+            "file_path": directory + "/" + filename,
+            "publisher": metadata['publisher'],
+            "license": license,
+        }
+        print("Updated metadata: ", updated_metadata)
+        
+        # call upload and ingest
+        ingest_status = upload_and_ingest(updated_metadata['file_path'], updated_metadata, doi)
+        
+        return {"success": "Downloaded and ingested successfully."}
+    except Exception as e:
+        print("Error: ", e)
+        # probably a 403 error - update supabase
+        update_info = {"notes": "403 client error (forbidden) in PDF download.", "downloadable": False, "modified_date": datetime.datetime.now().isoformat()}
+        response = SUPABASE_CLIENT.table("nal_publications").update(update_info).eq("doi_number", doi).execute()
+        return {"error": "403 client error (forbidden) in PDF download."}
+
+
+def downloadWileyArticle(doi=None):
+    """
+    This function fetches metadata from Crossref and downloads open access full text articles from Wiley.
+    """
+    # create directory to store files
+    directory = os.path.join(os.getcwd(), 'wiley_papers')
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    api_key = os.environ.get("WILEY_TDM_TOKEN")
+    metadata = {}
+    
+    # get metadata from Crossref
+    if doi:
+        # get article metadata
+        works = Works()
+        article_data = works.doi(doi)
+        print("Article license: ", article_data['license'])
+        
+        article_licenses = []
+        
+        for item in article_data['license']:
+            article_licenses.append(item['URL'])
+        print("Licenses: ", article_licenses)
+        # check if the license is open access - variant of CC
+        for license in article_licenses:
+            if license in LICENSES:
+                print("License found: ", license)
+                if LICENSES[license] == "closed_access":
+                    return "Article is not open access."
+                else:
+                    metadata['license'] = LICENSES[license]
+                    break
+            else:
+                return "License not found."
+        
+        metadata['doi'] = doi
+        metadata['title'] = article_data['title'][0]
+        metadata['journal'] = article_data['container-title'][0]
+        metadata['publisher'] = article_data['publisher']
+        metadata['issn'] = article_data['ISSN'][0]
+        metadata['url'] = article_data['URL']
+
+        print("Metadata: ", metadata)
+
+        # download PDF based on doi
+        base_url = "https://api.wiley.com/onlinelibrary/tdm/v1/articles/"
+        url = base_url + str(doi)
+
+        print("URL: ", url)
+
+        headers = {
+            'Wiley-TDM-Client-Token': api_key,
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            # exponential backoff logic
+            print("Error in accessing article link, retrying: ", response.text)
+
+            return "Error in accessing article link: " + str(response.status_code) + " - " + response.text
+        
+        filename = str(doi).replace("/", "_")
+        with open(directory + "/" + filename + ".pdf", "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        print("Downloaded: ", filename)
+
+        # upload file to S3 bucket
+
+        # prep payload for beam ingest
+
+        return "success"
+
+
+def get_license(url: str) -> str:
+    # Define license matches
+    license_mapping = {
+        "by-nc-nd": "CC BY-NC-ND",
+        "by-nc-sa": "CC BY-NC-SA",
+        "by-nc": "CC BY-NC",
+        "by": "CC BY",
+    }
+    
+    # Loop through the mapping and check if the URL contains the license string
+    for key, license in license_mapping.items():
+        if key in url:
+            return license
+
+    # Return 'Unknown' if no match is found
+    return "Unknown"
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ai_ta_backend/utils/pub_ingest.py b/ai_ta_backend/utils/pub_ingest.py
new file mode 100644
index 00000000..cc59f928
--- /dev/null
+++ b/ai_ta_backend/utils/pub_ingest.py
@@ -0,0 +1,541 @@
+import os
+import json
+import time
+import pandas as pd
+import shutil
+import requests
+import supabase
+import concurrent.futures
+from crossref.restful import Works, Journals
+from ai_ta_backend.database import aws, sql
+import backoff
+
+SPRINGER_API_KEY = os.environ.get('SPRINGER_API_KEY')
+CC_LICENSES = {
+    "http://creativecommons.org/licenses/by/4.0/": "CC BY",
+    "http://creativecommons.org/licenses/by-nc/4.0/": "CC BY-NC",
+    "http://creativecommons.org/licenses/by-nc-nd/4.0/": "CC BY-NC-ND",
+    "http://creativecommons.org/licenses/by-nc-sa/4.0/": "CC BY-NC-SA"
+}
+
+OTHER_LICENSES = {
+    "http://onlinelibrary.wiley.com/termsAndConditions#vor": "wiley_tnc",
+    "http://onlinelibrary.wiley.com/termsAndConditions#am": "wiley_tnc",
+    "http://doi.wiley.com/10.1002/tdm_license_1.1": "wiley_tdm"
+}
+
+s3_client = aws.AWSStorage()
+aws_bucket = os.getenv('S3_BUCKET_NAME')
+supabase_client = supabase.create_client(  # type: ignore
+      supabase_url=os.environ['SUPABASE_URL'], supabase_key=os.environ['SUPABASE_API_KEY'])
+
+
+def downloadSpringerFulltext(issn=None, subject=None, journal=None, title=None, doi=None, course_name=None):
+    """
+    This function uses the Springer Nature API to download openaccess journal articles.
+    Args:
+        issn: limit to ISSN number of the journal/book
+        subject: limit articles to a specific subject - Chemistry, Physics, etc.
+        journal: limit to keywords occuring in journal title
+        title: limit to keywords occuring in article title
+    The initial API response returns a list of articles with metadata.
+    
+    """
+    print("in downloadSpringerFulltext")
+    # create directory to store files
+    directory = os.path.join(os.getcwd(), 'springer_papers')
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    # set headers
+    api_url = "http://api.springernature.com/openaccess/json?q="
+    headers = {'Accept': 'application/json'}
+
+    # form the query URL based on the input parameters received
+    if doi:
+        query_str = "doi:" + doi
+    elif issn:
+        query_str = "issn:" + issn
+    elif journal:   
+        journal = "%22" + journal.replace(" ", "%20") + "%22"
+        query_str = "journal:" + journal
+    elif title:
+        title = "%22" + title.replace(" ", "%20") + "%22"
+        query_str = "title:" + title
+    elif subject:
+        query_str = "subject:" + subject
+    else:
+        return "No query parameters provided"
+    
+    main_url = api_url + query_str + "&api_key=" + str(SPRINGER_API_KEY)
+    print("Full URL: ", main_url)
+    
+    response = requests.get(main_url, headers=headers)
+    print("Status: ", response.status_code)
+
+    if response.status_code != 200:
+        return "Error: " + str(response.status_code) + " - " + response.text
+
+    data = response.json()
+    # check for total number of records 
+    total_records = int(data['result'][0]['total'])
+    print("Total records: ", total_records)
+    current_records = 0
+    while current_records < total_records:
+        # check if nextPage exists
+        try:
+            if 'nextPage' in data:
+                next_page_url = "http://api.springernature.com" + data['nextPage']
+            else:
+                next_page_url = None
+
+            # multi-process all records in current page
+            # with concurrent.futures.ProcessPoolExecutor() as executor:
+            #     results = []
+            #     for i in range(0, len(data['records']), 3):
+            #         batch = data['records'][i:i+3]
+            #         batch_results = [executor.submit(downloadPDFSpringer, record, directory) for record in batch]
+            #         results.extend(batch_results)
+            #     for f in concurrent.futures.as_completed(results):
+            #         print(f.result())
+            print("Total records: ", len(data['records']))
+            
+            for i in range(len(data['records'])):
+                print("i: ", i)
+                print("Processing record: ", data['records'][i])
+                print("\n")
+                article_metadata = downloadPDFSpringer(data['records'][i], directory)
+                article_metadata['issn'] = issn
+
+                # write article metadata to CSV file
+                metadata_csv = "springer_metadata.csv"
+                metadata_df = pd.DataFrame([article_metadata])
+                if not os.path.exists(metadata_csv):
+                    metadata_df.to_csv(metadata_csv, index=False)
+                else:
+                    metadata_df.to_csv(metadata_csv, mode='a', header=False, index=False)
+                
+            # update current records count
+            current_records += int(len(data['records']))
+
+            # if next page exists, update next page url and call the API again
+            if next_page_url:
+                # API key is already present in the URL
+                response = requests.get(next_page_url, headers=headers)
+                if response.status_code != 200:
+                    return "Error in next page: " + str(response.status_code) + " - " + response.text
+                
+                data = response.json()
+        except Exception as e:
+            print(e)
+
+    # print("Course name: ", course_name)
+    # prep payload for beam ingest
+    # ingest_data = []
+    
+    # upload files to S3 bucket
+    # for file in os.listdir(directory):
+    #     doi = file[:-4]
+    #     doi = doi.replace("_", "/")
+    #     doi_link = f"https://doi.org/{doi}"
+    #     data = {
+    #         "course_name": course_name,
+    #         "groups": "springer_open",
+    #         "s3_paths": "courses/" + course_name + "/" + file, # type: ignore
+    #         "readable_filename": file,
+    #         "base_url": "",
+    #         "url": doi_link,
+    #         "journal": "",
+    #     }
+    #     s3_path = "courses/" + course_name + "/" + file # type: ignore
+    #     s3_client.upload_file(directory + "/" + file, aws_bucket, s3_path)  # type: ignore
+    #     ingest_data.append(data)
+    
+    # # save ingest data to csv
+    # ingest_df = pd.DataFrame(ingest_data)
+    # csv_file = "publications_data.csv"
+    # if not os.path.exists(csv_file):
+    #     ingest_df.to_csv(csv_file, index=False)
+    # else:
+    #     ingest_df.to_csv(csv_file, mode='a', header=False, index=False)
+
+
+    # # call ingest
+    # beam_url = "https://3xn8l.apps.beam.cloud"
+    # headers = {
+    # "Content-Type": "application/json",
+    # "Authorization": "Basic " + os.getenv('BEAM_AUTH_TOKEN')    # type: ignore
+    # }
+
+    # pubs_data = pd.read_csv(csv_file)
+
+    # for row in pubs_data.iterrows():
+    #     payload = {
+    #     "course_name": "cropwizard-pro",
+    #     "s3_paths": [row[1]["s3_paths"]],
+    #     "readable_filename": row[1]["readable_filename"],
+    #     "base_url": "",
+    #     "url": row[1]["url"],
+    #     "groups": ["Springer", "CC-BY", "Research Paper"]
+    #     }
+    #     print(payload)
+    #     payload = json.dumps(data)
+    #     response = requests.post(beam_url, headers=headers, data=payload)
+
+    #     if response.status_code == 200:
+    #         print("Task status retrieved successfully!")
+    #     else:
+    #         print(f"Error: {response.status_code}. {response.text}")
+
+    # Delete files from local directory
+    # shutil.rmtree(directory)
+    # os.remove(csv_file)
+                                
+    return "success"
+
+def downloadPDFSpringer(record: dict, directory: str):
+    """
+    This function takes a record from the Springer API response and downloads the PDF file.
+    It is called in a multi-process loop in downloadSpringerFulltext().
+    Args:
+        record: dictionary containing DOI and other metadata
+        directory: local directory to save the files
+    """
+    print("in downloadPDFSpringer")
+    headers = {'Accept': 'application/json'}
+
+    if len(record['url']) < 1:
+        return "No download link found for DOI: " + record['doi']
+
+    # extract URL
+    url = record['url'][0]['value'] + "?api_key=" + str(SPRINGER_API_KEY)
+    print("URL: ", url)
+    url_response = requests.get(url, headers=headers)
+    print("URL response: ", url_response.status_code)
+    if url_response.status_code != 200:
+        return "Error in accessing article link: " + str(url_response.status_code) + " - " + url_response.text
+    
+    url_data = url_response.json()
+
+    if 'license' in url_data:
+        license_url = url_data['license'][0]['URL']
+        license = CC_LICENSES.get(license_url, license_url)
+        print("License: ", license)
+    else:
+        license = "unknown"
+        license_url = "unknown"
+
+    # extract PDF link
+    pdf_link = None
+    links = url_data['link']
+    for link in links:
+        if link['content-type'] == 'application/pdf' and link['intended-application'] == 'text-mining':
+            pdf_link = link['URL']
+            print("PDF Link: ", pdf_link)
+            break
+        
+    if not pdf_link:
+        pdf_link = links[0]['URL']
+        print("PDF Link: ", pdf_link)
+        if not pdf_link:
+            return "No PDF link found for DOI: " + record['doi']
+    
+    # download PDF
+    print("Downloading PDF: ", record['doi'])
+    if 'doi' in record:
+        filename = record['doi'].replace("/", "_")
+    else:
+        filename = url_data['DOI'].replace("/", "_")
+
+    try:
+        response = requests.get(pdf_link)
+        if response.status_code != 200:
+            return "Error in downloading PDF: " + str(response.status_code) + " - " + response.text
+        
+        with open(directory + "/" + filename + ".pdf", "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        print("Downloaded: ", filename)
+
+        # form metadata
+        metadata = {
+            "doi": record['doi'],
+            "publisher": record['publisher'],
+            "issn": record['issn'],
+            "license": license,
+            "license_url": license_url,
+            "metadata": url_data,
+        }
+        return metadata
+    except Exception as e:
+        return "Error in downloading PDF: " + str(e)
+
+def getCrossrefMetadata(issn: str):
+    """
+    Creates a csv file with metadata of all articles for given journal (ISSN)
+    """
+    try:
+        metadata = []
+        # get journal metadata
+        journals = Journals()
+        works = journals.works(issn=issn)
+        count = 0
+        no_license = 0
+        for item in works:
+            try:
+                count += 1
+                article_metadata = {}
+                # check if the license is open access - variant of CC
+                if 'license' not in item:
+                    no_license += 1
+                    continue
+                else:
+                    for license in item['license']:
+                        # check for creative commons license
+                        if license['URL'] in CC_LICENSES:
+                            article_metadata['license'] = CC_LICENSES[license['URL']]
+                            article_metadata['license_url'] = license['URL']
+                            break
+                        elif license['URL'] in OTHER_LICENSES:
+                            article_metadata['license'] = OTHER_LICENSES[license['URL']]
+                            article_metadata['license_url'] = license['URL']
+                        else:
+                            article_metadata['license'] = "unknown"
+                            article_metadata['license_url'] = license['URL']
+                            
+                article_metadata['doi'] = item['DOI']
+                if 'title' not in item:
+                    article_metadata['title'] = "No title found"
+                else:
+                    article_metadata['title'] = item['title'][0]
+                article_metadata['journal'] = item['container-title'][0]
+                article_metadata['publisher'] = item['publisher']
+                article_metadata['issn'] = item['ISSN'][0]
+                article_metadata['url'] = item['URL']
+                article_metadata['filename'] = item['DOI'].replace("/", "_") + ".pdf"
+                article_metadata['downloaded'] = "no"
+                metadata_csv = "wiley_metadata.csv"
+                metadata_df = pd.DataFrame([article_metadata])
+                if not os.path.exists(metadata_csv):
+                    metadata_df.to_csv(metadata_csv, index=False)
+                else:
+                    metadata_df.to_csv(metadata_csv, mode='a', header=False, index=False)
+                #metadata.append(article_metadata)
+                print("Processed: ", article_metadata['doi'])
+            except Exception as e:
+                print("Error processing article: ", article_metadata['doi'], e)
+        print("Total articles: ", count)
+        # metadata_csv = "wiley_metadata.csv"
+        # metadata_df = pd.DataFrame(metadata)
+        # if not os.path.exists(metadata_csv):
+        #     metadata_df.to_csv(metadata_csv, index=False)
+        # else:
+        #     metadata_df.to_csv(metadata_csv, mode='a', header=False, index=False)
+        
+        return "success"
+    except Exception as e:
+        return "Error: " + str(e)
+
+
+def downloadWileyFulltext(course_name=None, issn=[]):
+    """
+    This function fetches metadata from Crossref and downloads 
+    full-text articles from a given journal from Wiley.
+    """
+    # create directory to store files
+    directory = os.path.join(os.getcwd(), 'wiley_papers')
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+
+    # fetch metadata
+    for item in issn:
+        metadata_status = getCrossrefMetadata(item)
+        print("Metadata status: ", metadata_status)
+    
+    # download PDFs based on metadata
+    metadata_csv = "wiley_metadata.csv"
+    if os.path.exists(metadata_csv):
+        metadata_df = pd.read_csv(metadata_csv)
+        metadata = metadata_df.to_dict(orient='records')
+
+    for item in metadata:
+        try:
+            if item['license'] in ['CC BY', 'CC BY-NC', 'CC BY-NC-ND', 'CC BY-NC-SA'] and item['downloaded'] == 'no' and item['publisher'] == 'Wiley':
+                status = downloadWileyPDF(item['doi'])
+                print("Download status: ", status)
+                if status == "success":
+                    item['downloaded'] = 'yes'
+                time.sleep(5)    
+        except Exception as e:
+            print(e)
+        
+        #time.sleep(10)
+    
+    metadata_df = pd.DataFrame(metadata)
+    metadata_df.to_csv(metadata_csv, index=False) 
+
+    return "success"
+
+    
+    # # prep payload for beam ingest
+    # ingest_data = []
+        
+    # # upload files to S3 bucket
+    # for file in os.listdir(directory):
+    #     doi = file[:-4]
+    #     doi = doi.replace("_", "/")
+    #     doi_link = f"https://doi.org/{doi}"
+    #     data = {
+    #         "course_name": course_name,
+    #         "group": "wiley",
+    #         "s3_paths": "courses/" + course_name + "/" + file, # type: ignore
+    #         "readable_filename": file,
+    #         "base_url": "",
+    #         "url": doi_link,
+    #         "journal": "",
+    #     }
+    #     s3_path = "courses/" + course_name + "/" + file # type: ignore
+    #     s3_client.upload_file(directory + "/" + file, aws_bucket, s3_path)  # type: ignore
+    #     ingest_data.append(data)
+        
+    # # save ingest data to csv
+    # ingest_df = pd.DataFrame(ingest_data)
+    # csv_file = "publications_data.csv"
+    # if not os.path.exists(csv_file):
+    #     ingest_df.to_csv(csv_file, index=False)
+    # else:
+    #     ingest_df.to_csv(csv_file, mode='a', header=False, index=False)
+
+
+    # # call ingest
+    # beam_url = "https://41kgx.apps.beam.cloud"
+    # headers = {
+    # "Content-Type": "application/json",
+    # "Authorization": "Basic " + os.getenv('BEAM_AUTH_TOKEN')    # type: ignore
+    # }
+    # for data in ingest_data:
+    #     payload = json.dumps(data)
+    #     response = requests.post(beam_url, headers=headers, data=payload)
+    #     if response.status_code == 200:
+    #         print("Task status retrieved successfully!")
+    #     else:
+    #         print(f"Error: {response.status_code}. {response.text}")
+
+    # Delete files from local directory
+    #shutil.rmtree(directory)
+
+    
+                
+#@backoff.on_exception(backoff.expo, requests.exceptions.HTTPError, max_tries=1)
+def downloadWileyPDF(doi=None):
+    """
+    This function downloads a PDF file from Wiley based on the DOI.
+    """
+    try:
+        # create directory to store files
+        directory = os.path.join(os.getcwd(), 'wiley_papers')
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        api_key = os.environ.get("WILEY_TDM_TOKEN")
+
+        # download PDF based on doi
+        base_url = "https://api.wiley.com/onlinelibrary/tdm/v1/articles/"
+        url = base_url + str(doi)
+        print("URL: ", url)
+
+        headers = {
+            'Wiley-TDM-Client-Token': api_key,
+            'Content-Type': 'application/json'
+        }
+        
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+            
+        filename = str(doi).replace("/", "_") + ".pdf"
+        with open(directory + "/" + filename, "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        print("Downloaded: ", filename)
+        
+        return "success"
+    except Exception as e:
+        print("Error: ", e)
+        return "error"
+
+
+def downloadWileyArticle(doi=None):
+    """
+    This function fetches metadata from Crossref and downloads open access full text articles from Wiley.
+    """
+    # create directory to store files
+    directory = os.path.join(os.getcwd(), 'wiley_papers')
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    api_key = os.environ.get("WILEY_TDM_TOKEN")
+    metadata = {}
+    
+    # get metadata from Crossref
+    if doi:
+        # get article metadata
+        works = Works()
+        article_data = works.doi(doi)
+        print("Article license: ", article_data['license'])
+        
+        article_licenses = []
+        
+        for item in article_data['license']:
+            article_licenses.append(item['URL'])
+        print("Licenses: ", article_licenses)
+        # check if the license is open access - variant of CC
+        for license in article_licenses:
+            if license in LICENSES:
+                print("License found: ", license)
+                if LICENSES[license] == "closed_access":
+                    return "Article is not open access."
+                else:
+                    metadata['license'] = LICENSES[license]
+                    break
+            else:
+                return "License not found."
+        
+        metadata['doi'] = doi
+        metadata['title'] = article_data['title'][0]
+        metadata['journal'] = article_data['container-title'][0]
+        metadata['publisher'] = article_data['publisher']
+        metadata['issn'] = article_data['ISSN'][0]
+        metadata['url'] = article_data['URL']
+
+        print("Metadata: ", metadata)
+
+        # download PDF based on doi
+        base_url = "https://api.wiley.com/onlinelibrary/tdm/v1/articles/"
+        url = base_url + str(doi)
+
+        print("URL: ", url)
+
+        headers = {
+            'Wiley-TDM-Client-Token': api_key,
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            # exponential backoff logic
+            print("Error in accessing article link, retrying: ", response.text)
+
+            return "Error in accessing article link: " + str(response.status_code) + " - " + response.text
+        
+        filename = str(doi).replace("/", "_")
+        with open(directory + "/" + filename + ".pdf", "wb") as f:  # Open a file in binary write mode ("wb")
+            for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
+                f.write(chunk)
+        print("Downloaded: ", filename)
+
+        # upload file to S3 bucket
+
+        # prep payload for beam ingest
+
+        return "success"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 600955d5..1139f506 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -40,6 +40,10 @@ supabase==2.0.2
 posthog==3.1.0
 sentry-sdk==1.39.1
 
+# Publications
+crossrefapi
+
+
 # Not currently supporting coursera ingest
 # cs-dlp @ git+https://github.com/raffaem/cs-dlp.git@0.12.0b0 # previously called coursera-dl