Skip to content

Commit

Permalink
Improve CLI speed with lazy imports (#1319)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgbradley1 authored Nov 16, 2024
1 parent 9b4f24e commit 22a57d1
Show file tree
Hide file tree
Showing 237 changed files with 936 additions and 1,383 deletions.
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241025031711368197.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "move import statements out of init files"
}
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241031180003172666.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "fix autocompletion of existing files/directory paths."
}
6 changes: 3 additions & 3 deletions docs/prompt_tuning/auto_prompt_tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ Before running auto tuning, ensure you have already initialized your workspace w
You can run the main script from the command line with various options:

```bash
graphrag prompt-tune [--root ROOT] [--domain DOMAIN] [--method METHOD] [--limit LIMIT] [--language LANGUAGE] \
graphrag prompt-tune [--root ROOT] [--config CONFIG] [--domain DOMAIN] [--selection-method METHOD] [--limit LIMIT] [--language LANGUAGE] \
[--max-tokens MAX_TOKENS] [--chunk-size CHUNK_SIZE] [--n-subset-max N_SUBSET_MAX] [--k K] \
[--min-examples-required MIN_EXAMPLES_REQUIRED] [--no-entity-types] [--output OUTPUT]
[--min-examples-required MIN_EXAMPLES_REQUIRED] [--discover-entity-types] [--output OUTPUT]
```

## Command-Line Options
Expand All @@ -49,7 +49,7 @@ graphrag prompt-tune [--root ROOT] [--domain DOMAIN] [--method METHOD] [--limit

- `--min-examples-required` (optional): The minimum number of examples required for entity extraction prompts. Default is 2.

- `--no-entity-types` (optional): Use untyped entity extraction generation. We recommend using this when your data covers a lot of topics or it is highly randomized.
- `--discover-entity-types` (optional): Allow the LLM to discover and extract entities automatically. We recommend using this when your data covers a lot of topics or it is highly randomized.

- `--output` (optional): The folder to save the generated prompts. Default is "prompts".

Expand Down
2 changes: 1 addition & 1 deletion examples/custom_input/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pandas as pd

from graphrag.index import run_pipeline_with_config
from graphrag.index.run import run_pipeline_with_config

pipeline_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
Expand Down
4 changes: 2 additions & 2 deletions examples/single_verb/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import pandas as pd

from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineWorkflowReference
from graphrag.index.config.workflow import PipelineWorkflowReference
from graphrag.index.run import run_pipeline, run_pipeline_with_config

# our fake dataset
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
Expand Down
7 changes: 4 additions & 3 deletions examples/use_built_in_workflows/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import asyncio
import os

from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineCSVInputConfig, PipelineWorkflowReference
from graphrag.index.input import load_input
from graphrag.index.config.input import PipelineCSVInputConfig
from graphrag.index.config.workflow import PipelineWorkflowReference
from graphrag.index.input.load_input import load_input
from graphrag.index.run import run_pipeline, run_pipeline_with_config

sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../_sample_data/"
Expand Down
2 changes: 1 addition & 1 deletion graphrag/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

"""The GraphRAG package."""

from .cli.main import app
from graphrag.cli.main import app

app(prog_name="graphrag")
3 changes: 2 additions & 1 deletion graphrag/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
"""

from graphrag.api.index import build_index
from graphrag.api.prompt_tune import DocSelectionType, generate_indexing_prompts
from graphrag.api.prompt_tune import generate_indexing_prompts
from graphrag.api.query import (
drift_search,
global_search,
global_search_streaming,
local_search,
local_search_streaming,
)
from graphrag.prompt_tune.types import DocSelectionType

__all__ = [ # noqa: RUF022
# index API
Expand Down
5 changes: 3 additions & 2 deletions graphrag/api/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@

from pathlib import Path

from graphrag.config import CacheType, GraphRagConfig
from graphrag.config.enums import CacheType
from graphrag.config.models.graph_rag_config import GraphRagConfig
from graphrag.index.cache.noop_pipeline_cache import NoopPipelineCache
from graphrag.index.create_pipeline_config import create_pipeline_config
from graphrag.index.emit.types import TableEmitterType
from graphrag.index.run import run_pipeline_with_config
from graphrag.index.typing import PipelineRunResult
from graphrag.logging import ProgressReporter
from graphrag.logging.base import ProgressReporter
from graphrag.vector_stores.factory import VectorStoreType


Expand Down
35 changes: 21 additions & 14 deletions graphrag/api/prompt_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,32 @@
from pydantic import PositiveInt, validate_call

from graphrag.config.models.graph_rag_config import GraphRagConfig
from graphrag.index.llm import load_llm
from graphrag.logging import PrintProgressReporter
from graphrag.prompt_tune.generator import (
MAX_TOKEN_COUNT,
create_community_summarization_prompt,
create_entity_extraction_prompt,
create_entity_summarization_prompt,
detect_language,
from graphrag.index.llm.load_llm import load_llm
from graphrag.logging.print_progress import PrintProgressReporter
from graphrag.prompt_tune.defaults import MAX_TOKEN_COUNT
from graphrag.prompt_tune.generator.community_report_rating import (
generate_community_report_rating,
)
from graphrag.prompt_tune.generator.community_report_summarization import (
create_community_summarization_prompt,
)
from graphrag.prompt_tune.generator.community_reporter_role import (
generate_community_reporter_role,
generate_domain,
)
from graphrag.prompt_tune.generator.domain import generate_domain
from graphrag.prompt_tune.generator.entity_extraction_prompt import (
create_entity_extraction_prompt,
)
from graphrag.prompt_tune.generator.entity_relationship import (
generate_entity_relationship_examples,
generate_entity_types,
generate_persona,
)
from graphrag.prompt_tune.loader import (
MIN_CHUNK_SIZE,
load_docs_in_chunks,
from graphrag.prompt_tune.generator.entity_summarization_prompt import (
create_entity_summarization_prompt,
)
from graphrag.prompt_tune.generator.entity_types import generate_entity_types
from graphrag.prompt_tune.generator.language import detect_language
from graphrag.prompt_tune.generator.persona import generate_persona
from graphrag.prompt_tune.loader.input import MIN_CHUNK_SIZE, load_docs_in_chunks
from graphrag.prompt_tune.types import DocSelectionType


Expand Down
6 changes: 3 additions & 3 deletions graphrag/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
import pandas as pd
from pydantic import validate_call

from graphrag.config import GraphRagConfig
from graphrag.config.models.graph_rag_config import GraphRagConfig
from graphrag.index.config.embeddings import (
community_full_content_embedding,
entity_description_embedding,
)
from graphrag.logging import PrintProgressReporter
from graphrag.logging.print_progress import PrintProgressReporter
from graphrag.query.factories import (
get_drift_search_engine,
get_global_search_engine,
Expand All @@ -47,8 +47,8 @@
from graphrag.query.structured_search.base import SearchResult # noqa: TCH001
from graphrag.utils.cli import redact
from graphrag.utils.embeddings import create_collection_name
from graphrag.vector_stores import VectorStoreFactory, VectorStoreType
from graphrag.vector_stores.base import BaseVectorStore
from graphrag.vector_stores.factory import VectorStoreFactory, VectorStoreType

reporter = PrintProgressReporter("")

Expand Down
11 changes: 5 additions & 6 deletions graphrag/callbacks/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@

from datashaper import WorkflowCallbacks

from graphrag.config import ReportingType
from graphrag.index.config import (
from graphrag.callbacks.blob_workflow_callbacks import BlobWorkflowCallbacks
from graphrag.callbacks.console_workflow_callbacks import ConsoleWorkflowCallbacks
from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks
from graphrag.config.enums import ReportingType
from graphrag.index.config.reporting import (
PipelineBlobReportingConfig,
PipelineFileReportingConfig,
PipelineReportingConfig,
)

from .blob_workflow_callbacks import BlobWorkflowCallbacks
from .console_workflow_callbacks import ConsoleWorkflowCallbacks
from .file_workflow_callbacks import FileWorkflowCallbacks


def create_pipeline_reporter(
config: PipelineReportingConfig | None, root_dir: str | None
Expand Down
3 changes: 1 addition & 2 deletions graphrag/callbacks/global_search_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

"""GlobalSearch LLM Callbacks."""

from graphrag.callbacks.llm_callbacks import BaseLLMCallback
from graphrag.query.structured_search.base import SearchResult

from .llm_callbacks import BaseLLMCallback


class GlobalSearchLLMCallback(BaseLLMCallback):
"""GlobalSearch LLM Callbacks."""
Expand Down
2 changes: 1 addition & 1 deletion graphrag/callbacks/progress_workflow_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from datashaper import ExecutionNode, NoopWorkflowCallbacks, Progress, TableContainer

from graphrag.logging import ProgressReporter
from graphrag.logging.base import ProgressReporter


class ProgressWorkflowCallbacks(NoopWorkflowCallbacks):
Expand Down
14 changes: 7 additions & 7 deletions graphrag/cli/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
from pathlib import Path

import graphrag.api as api
from graphrag.config import (
CacheType,
enable_logging_with_config,
load_config,
resolve_paths,
)
from graphrag.config.enums import CacheType
from graphrag.config.load_config import load_config
from graphrag.config.logging import enable_logging_with_config
from graphrag.config.resolve_path import resolve_paths
from graphrag.index.emit.types import TableEmitterType
from graphrag.index.validate_config import validate_config_names
from graphrag.logging import ProgressReporter, ReporterType, create_progress_reporter
from graphrag.logging.base import ProgressReporter
from graphrag.logging.factories import create_progress_reporter
from graphrag.logging.types import ReporterType
from graphrag.utils.cli import redact

# Ignore warnings from numba
Expand Down
3 changes: 2 additions & 1 deletion graphrag/cli/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from pathlib import Path

from graphrag.config.init_content import INIT_DOTENV, INIT_YAML
from graphrag.logging import ReporterType, create_progress_reporter
from graphrag.logging.factories import create_progress_reporter
from graphrag.logging.types import ReporterType
from graphrag.prompts.index.claim_extraction import CLAIM_EXTRACTION_PROMPT
from graphrag.prompts.index.community_report import (
COMMUNITY_REPORT_PROMPT,
Expand Down
Loading

0 comments on commit 22a57d1

Please sign in to comment.