diff --git a/.gitignore b/.gitignore index a95bce69..00facf06 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ Lib/* Scripts/* .idea/* test.json +out.json diff --git a/src/somef/cli.py b/src/somef/cli.py index e123fe0d..e013eba7 100644 --- a/src/somef/cli.py +++ b/src/somef/cli.py @@ -347,7 +347,8 @@ def get_path(obj, path): print(dockerfiles) if len(notebooks) > 0: - filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in notebooks] + filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in + notebooks] if len(dockerfiles) > 0: filtered_resp["hasBuildFile"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in dockerfiles] if len(docs) > 0: @@ -369,6 +370,7 @@ def get_path(obj, path): def convert_to_raw_usercontent(partial, owner, repo_name, repo_ref): return f"https://raw.githubusercontent.com/{owner}/{repo_name}/{repo_ref}/{urllib.parse.quote(partial)}" + ## Function takes readme text as input and divides it into excerpts ## Returns the extracted excerpts def create_excerpts(string_list): @@ -595,16 +597,19 @@ def merge(header_predictions, predictions, citations, dois, binder_links, long_t def format_output(git_data, repo_data): print("formatting output") for i in git_data.keys(): + # print(i) + # print(git_data[i]) if i == 'description': if 'description' not in repo_data.keys(): repo_data['description'] = [] - repo_data['description'].append({'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) + if git_data[i] != "": + repo_data['description'].append( + {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) else: if i == 'hasExecutableNotebook' or i == 'hasBuildFile' or i == 'hasDocumentation': repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'File Exploration'} - else: + elif git_data[i] != "" and git_data[i] != []: repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'} - return repo_data @@ -670,13 +675,18 @@ def average_confidence(x): reverse=True) descriptions_text = [x["excerpt"] for x in descriptions] + published_date = "" + try: + published_date = format_date(release_path(["datePublished"])) + except: + print("Published date is not available") + codemeta_output = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "@type": "SoftwareSourceCode", "license": data_path(["license", "excerpt", "url"]), "codeRepository": "git+" + code_repository + ".git", "dateCreated": format_date(data_path(["dateCreated", "excerpt"])), - "datePublished": format_date(release_path(["datePublished"])), "dateModified": format_date(data_path(["dateModified", "excerpt"])), "downloadUrl": data_path(["downloadUrl", "excerpt"]), "issueTracker": code_repository + "/issues", @@ -694,6 +704,8 @@ def average_confidence(x): } ] } + if published_date != "": + codemeta_output["datePublished"] = published_date pruned_output = {}