Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions osa_tool/config/settings/arguments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ convert_notebooks:
Provide one or multiple paths, or leave empty for repo directory.
example: path/to/file1, path/to/file2

translate_readme:
aliases: [ "--translate-readme" ]
type: list
description: |
List of target languages to translate the project's main README into.
Each language should be specified by its name (e.g., "Russian", "Chinese").
The translated README files will be saved separately in the repository folder
with language-specific suffixes (e.g., README_ru.md, README_zh.md).
example: Russian, Chinese

delete_dir:
aliases: [ "--delete-dir" ]
type: flag
Expand Down
1 change: 1 addition & 0 deletions osa_tool/config/settings/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ web_mode = false
# branch = ""
# article = ""
translate_dirs = false
translate_readme = []
# convert_notebooks = []
delete_dir = false
# ensure_license = ""
Expand Down
34 changes: 34 additions & 0 deletions osa_tool/config/settings/prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -258,4 +258,38 @@ SPECIAL RULES:
- All content before the first meaningful markdown header (starting with `## `) in the original README should go to the `badges` section **only if it contains the project title, logos, or status badges**.
- Textual descriptions, images, or code examples before the first `## ` header should be evaluated and placed into the appropriate sections (like "Overview", "Usage", or "Installation").
- Avoid placing unrelated text or images in `badges`.
"""

translate = """
TASK:
Translate the provided README content into the target language.

RULES:

- Translate only natural language parts (titles, descriptions, paragraphs, lists).
- DO NOT translate:
* project name,
* code blocks,
* shell commands,
* configuration snippets,
* links, badges, and image references.
- Preserve original Markdown formatting.
- Return only valid JSON, no explanations.
- The "suffix" must be the correct ISO 639-1 or common abbreviation of the target language.

OUTPUT FORMAT:
Return a JSON object with the following structure:

{{
"content": "translated README text",
"suffix": "short language code (e.g., en, ru, es, fr, de)"
}}

INPUT DATA:

1. TARGET LANGUAGE: {target_language}

2. README CONTENT:

{readme_content}
"""
12 changes: 12 additions & 0 deletions osa_tool/readmegen/prompts/prompts_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,18 @@ def get_prompt_algorithms_article(self, key_files: list[FileContext], pdf_summar
logger.error(f"Failed to build algorithms prompt: {e}")
raise

def get_prompt_translate_readme(self, readme_content: str, target_language: str) -> str:
"""Builds a prompt to translate README into target language"""
try:
formatted_prompt = self.prompts["translate"].format(
target_language=target_language,
readme_content=readme_content,
)
return formatted_prompt
except Exception as e:
logger.error(f"Failed to build readme translation prompt: {e}")
raise

@staticmethod
def serialize_file_contexts(files: list[FileContext]) -> str:
"""
Expand Down
6 changes: 6 additions & 0 deletions osa_tool/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
update_workflow_config,
)
from osa_tool.translation.dir_translator import DirectoryTranslator
from osa_tool.translation.readme_translator import ReadmeTranslator
from osa_tool.utils import (
delete_repository,
logger,
Expand Down Expand Up @@ -136,6 +137,11 @@ def main():
rich_section("README generation")
readme_agent(config, plan.get("article"), plan.get("refine_readme"))

# Readme translation
if plan.get("translate_readme"):
rich_section("README translation")
ReadmeTranslator(config, plan.get("translate_readme")).translate_readme()

# About section generation
about_gen = None
if plan.get("about"):
Expand Down
132 changes: 132 additions & 0 deletions osa_tool/translation/readme_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import asyncio
import json
import os
import shutil

from osa_tool.config.settings import ConfigLoader
from osa_tool.models.models import ModelHandlerFactory, ModelHandler
from osa_tool.readmegen.postprocessor.response_cleaner import process_text
from osa_tool.readmegen.prompts.prompts_builder import PromptBuilder
from osa_tool.readmegen.utils import read_file, save_sections, remove_extra_blank_lines
from osa_tool.utils import parse_folder_name, logger


class ReadmeTranslator:
def __init__(self, config_loader: ConfigLoader, languages: list[str]):
self.config_loader = config_loader
self.config = self.config_loader.config
self.rate_limit = self.config.llm.rate_limit
self.languages = languages
self.repo_url = self.config.git.repository
self.model_handler: ModelHandler = ModelHandlerFactory.build(self.config)
self.base_path = os.path.join(os.getcwd(), parse_folder_name(self.repo_url))

async def translate_readme_request_async(
self, readme_content: str, target_language: str, semaphore: asyncio.Semaphore
) -> dict:
"""Asynchronous request to translate README content via LLM."""
prompt = PromptBuilder(self.config_loader).get_prompt_translate_readme(readme_content, target_language)
async with semaphore:
response = await self.model_handler.async_request(prompt)
response = process_text(response)
try:
result = json.loads(response)
except json.JSONDecodeError:
logger.warning(f"LLM response for '{target_language}' is not valid JSON, applying fallback")
result = {
"content": response.strip(),
"suffix": target_language[:2].lower(),
}

result["target_language"] = target_language
return result
Comment on lines 24 to 42
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А нет смысла всю эту асинхронную логику куда-то отдельно вынести и при необходимости переиспользовать? Она же вроде не специфична именно для translate-функциоальности?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, в будущем надо будет перенести. Скорее всего в модуль с генерацией readme


async def translate_readme_async(self) -> None:
"""
Asynchronously translate the main README into all target languages.
"""
readme_content = self.get_main_readme_file()
if not readme_content:
logger.warning("No README content found, skipping translation")
return

semaphore = asyncio.Semaphore(self.rate_limit)

results = {}

async def translate_and_save(lang: str):
translation = await self.translate_readme_request_async(readme_content, lang, semaphore)
self.save_translated_readme(translation)
results[lang] = translation

await asyncio.gather(*(translate_and_save(lang) for lang in self.languages))

if self.languages:
first_lang = self.languages[0]
if first_lang in results:
self.set_default_translated_readme(results[first_lang])
else:
logger.warning(f"No translation found for first language '{first_lang}'")

def save_translated_readme(self, translation: dict) -> None:
"""
Save a single translated README to a file.
Args:
translation (dict): Dictionary with keys:
- "content": translated README text
- "suffix": language code
"""
suffix = translation.get("suffix", "unknown")
content = translation.get("content", "")

if not content:
logger.warning(f"Translation for '{suffix}' is empty, skipping save.")
return

filename = f"README_{suffix}.md"
file_path = os.path.join(self.base_path, filename)

save_sections(content, file_path)
remove_extra_blank_lines(file_path)
logger.info(f"Saved translated README: {file_path}")

def set_default_translated_readme(self, translation: dict) -> None:
"""
Create a .github/README.md symlink (or copy fallback)
pointing to the first translated README.
"""
suffix = translation.get("suffix")
if not suffix:
logger.warning("No suffix for first translated README, skipping default setup.")
return

source_path = os.path.join(self.base_path, f"README_{suffix}.md")
if not os.path.exists(source_path):
logger.warning(f"Translated README not found at {source_path}, skipping setup.")
return

github_dir = os.path.join(self.base_path, ".github")
os.makedirs(github_dir, exist_ok=True)

target_path = os.path.join(github_dir, "README.md")

try:
if os.path.exists(target_path):
os.remove(target_path)

os.symlink(source_path, target_path)
logger.info(f"Created symlink: {target_path} -> {source_path}")
except (OSError, NotImplementedError) as e:
logger.warning(f"Symlink not supported ({e}), copying file instead")
shutil.copyfile(source_path, target_path)
logger.info(f"Copied file: {target_path}")

def get_main_readme_file(self) -> str:
"""Return the content of the main README.md in the repository root, or empty string if not found."""
readme_path = os.path.join(self.base_path, "README.md")
return read_file(readme_path)

def translate_readme(self) -> None:
"""Synchronous wrapper around async translation."""
asyncio.run(self.translate_readme_async())