FEAT: Bulk Copy Options #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed

jahnvi480 wants to merge 6 commits into main from jahnvi/bulk_copy_options

mssql_python/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,9 @@ @@
     This module initializes the mssql_python package.
     """
+    # Import for pooling functionality
+    from .pooling import PoolingManager
     # Exceptions
     # https://www.python.org/dev/peps/pep-0249/#exceptions
     from .exceptions import (
@@ Expand Down Expand Up / @@ -45,15 +48,81 @@ @@
     from .logging_config import setup_logging, get_logger
     # Constants
-    from .constants import ConstantsDDBC
+    from .constants import ConstantsDDBC, BCPControlOptions, BCPDataTypes
+    # BCP
+    from .bcp_options import BCPOptions, ColumnFormat
     # GLOBALS
-    # Read-Only
+    # Read-Only - PEP-249 mandates these names
     apilevel = "2.0"
     paramstyle = "qmark"
     threadsafety = 1
-    from .pooling import PoolingManager
+    # Create direct variables for easier access to BCP data type constants Read-only
+    # Character/string types
+    SQLTEXT = BCPDataTypes.SQLTEXT.value
+    SQLVARCHAR = BCPDataTypes.SQLVARCHAR.value
+    SQLCHARACTER = BCPDataTypes.SQLCHARACTER.value
+    SQLBIGCHAR = BCPDataTypes.SQLBIGCHAR.value
+    SQLBIGVARCHAR = BCPDataTypes.SQLBIGVARCHAR.value
+    SQLNCHAR = BCPDataTypes.SQLNCHAR.value
+    SQLNVARCHAR = BCPDataTypes.SQLNVARCHAR.value
+    SQLNTEXT = BCPDataTypes.SQLNTEXT.value
+    # Binary types
+    SQLBINARY = BCPDataTypes.SQLBINARY.value
+    SQLVARBINARY = BCPDataTypes.SQLVARBINARY.value
+    SQLBIGBINARY = BCPDataTypes.SQLBIGBINARY.value
+    SQLBIGVARBINARY = BCPDataTypes.SQLBIGVARBINARY.value
+    SQLIMAGE = BCPDataTypes.SQLIMAGE.value
+    # Integer types
+    SQLBIT = BCPDataTypes.SQLBIT.value
+    SQLBITN = BCPDataTypes.SQLBITN.value
+    SQLINT1 = BCPDataTypes.SQLINT1.value
+    SQLINT2 = BCPDataTypes.SQLINT2.value
+    SQLINT4 = BCPDataTypes.SQLINT4.value
+    SQLINT8 = BCPDataTypes.SQLINT8.value
+    SQLINTN = BCPDataTypes.SQLINTN.value
+    # Floating point types
+    SQLFLT4 = BCPDataTypes.SQLFLT4.value
+    SQLFLT8 = BCPDataTypes.SQLFLT8.value
+    SQLFLTN = BCPDataTypes.SQLFLTN.value
+    # Decimal/numeric types
+    SQLDECIMAL = BCPDataTypes.SQLDECIMAL.value
+    SQLNUMERIC = BCPDataTypes.SQLNUMERIC.value
+    SQLDECIMALN = BCPDataTypes.SQLDECIMALN.value
+    SQLNUMERICN = BCPDataTypes.SQLNUMERICN.value
+    # Money types
+    SQLMONEY = BCPDataTypes.SQLMONEY.value
+    SQLMONEY4 = BCPDataTypes.SQLMONEY4.value
+    SQLMONEYN = BCPDataTypes.SQLMONEYN.value
+    # Date/time types
+    SQLDATETIME = BCPDataTypes.SQLDATETIME.value
+    SQLDATETIM4 = BCPDataTypes.SQLDATETIM4.value
+    SQLDATETIMN = BCPDataTypes.SQLDATETIMN.value
+    SQLDATEN = BCPDataTypes.SQLDATEN.value
+    SQLTIMEN = BCPDataTypes.SQLTIMEN.value
+    SQLDATETIME2N = BCPDataTypes.SQLDATETIME2N.value
+    SQLDATETIMEOFFSETN = BCPDataTypes.SQLDATETIMEOFFSETN.value
+    # Special types
+    SQLUNIQUEID = BCPDataTypes.SQLUNIQUEID.value
+    SQLVARIANT = BCPDataTypes.SQLVARIANT.value
+    SQLUDT = BCPDataTypes.SQLUDT.value
+    SQLXML = BCPDataTypes.SQLXML.value
+    SQLTABLE = BCPDataTypes.SQLTABLE.value
+    # BCP special values
+    SQL_VARLEN_DATA = BCPDataTypes.SQL_VARLEN_DATA.value
+    SQL_NULL_DATA = BCPDataTypes.SQL_NULL_DATA.value
     def pooling(max_size=100, idle_timeout=600, enabled=True):
     #     """
     #     Enable connection pooling with the specified parameters.
@@ Expand Down @@

mssql_python/bcp_options.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,34 +1,84 @@
  
    """

    Copyright (c) Microsoft Corporation.

    Licensed under the MIT license.

    Provides classes for configuring SQL Server Bulk Copy Program (BCP) operations.

    This module defines the core classes needed for BCP functionality:

    - BindData: Represents data bindings for in-memory BCP operations

    - ColumnFormat: Defines column formatting for BCP operations

    - BCPOptions: Configures the overall BCP operation settings

    """

    from dataclasses import dataclass, field

    from typing import List, Optional, Literal

    from typing import List, Optional, Union, Any

    # defining constants for BCP control options

    ALLOWED_DIRECTIONS = ("in", "out", "queryout")

    ALLOWED_FILE_MODES = ("native", "char", "unicode")

    @dataclass

    class BindData:

        """

        Represents the data binding for a column in a bulk copy operation.

        Used with bcp_bind API.

        Attributes:

            data: Pointer to the data to be copied. Can be primitive types or bytes.

            indicator_length: Length of indicator in bytes (0, 1, 2, 4, or 8).

            data_length: Count of bytes of data in the variable 

                (can be SQL_VARLEN_DATA/SQL_NULL_DATA).

            terminator: Byte pattern marking the end of the variable, if any.

            terminator_length: Count of bytes in the terminator.

            data_type: The C data type of the variable (using SQL Server type tokens).

            server_col: Ordinal position of the column in the database table (1-based).

        """

        data: Any = None

        indicator_length: int = 0

        data_length: int = 0  # Can be SQL_VARLEN_DATA or SQL_NULL_DATA

        terminator: Optional[bytes] = None

        terminator_length: int = 0

        data_type: int = 0  # SQL Server data type tokens

        server_col: int = 0  # 1-based column number in table

        def __post_init__(self):

            if self.indicator_length not in [0, 1, 2, 4, 8]:

                raise ValueError("indicator_length must be 0, 1, 2, 4, or 8.")

            if self.server_col <= 0:

                raise ValueError("server_col must be a positive integer (1-based).")

            if self.terminator is not None and not isinstance(self.terminator, bytes):

                raise TypeError("terminator must be bytes or None.")

    @dataclass

    class ColumnFormat:

        """

        Represents the format of a column in a bulk copy operation.

        Attributes:

            prefix_len (int): Option: (format_file) or (prefix_len, data_len).

            prefix_len: Option: (format_file) or (prefix_len, data_len).

                The length of the prefix for fixed-length data types. Must be non-negative.

            data_len (int): Option: (format_file) or (prefix_len, data_len).

            data_len: Option: (format_file) or (prefix_len, data_len).

                The length of the data. Must be non-negative.

            field_terminator (Optional[bytes]): Option: (-t). The field terminator string.

            field_terminator: Option: (-t). The field terminator string.

                e.g., b',' for comma-separated values.

            row_terminator (Optional[bytes]): Option: (-r). The row terminator string.

            row_terminator: Option: (-r). The row terminator string.

                e.g., b'\\n' for newline-terminated rows.

            server_col (int): Option: (format_file) or (server_col). The 1-based column number

            server_col: Option: (format_file) or (server_col). The 1-based column number

                in the SQL Server table. Defaults to 1, representing the first column.

                Must be a positive integer.

            file_col (int): Option: (format_file) or (file_col). The 1-based column number

            file_col: Option: (format_file) or (file_col). The 1-based column number

                in the data file. Defaults to 1, representing the first column.

                Must be a positive integer.

        """

        prefix_len: int

        data_len: int

        file_col: int = 1

        user_data_type: int = 0

        prefix_len: int = 0

        data_len: int = 0

        field_terminator: Optional[bytes] = None

        row_terminator: Optional[bytes] = None

        terminator_len: int = 0

        server_col: int = 1

        file_col: int = 1

        def __post_init__(self):

            if self.prefix_len < 0:

    @@ -43,79 +93,135 @@ def __post_init__(self):
  
                self.field_terminator, bytes

            ):

                raise TypeError("field_terminator must be bytes or None.")

            if self.row_terminator is not None and not isinstance(

                self.row_terminator, bytes

            ):

                raise TypeError("row_terminator must be bytes or None.")

    @dataclass

    class BCPOptions:

        """

        Represents the options for a bulk copy operation.

        Attributes:

            direction (Literal[str]): 'in' or 'out'. Option: (-i or -o).

            data_file (str): The data file. Option: (positional argument).

            error_file (Optional[str]): The error file. Option: (-e).

            format_file (Optional[str]): The format file to use for 'in'/'out'. Option: (-f).

            batch_size (Optional[int]): The batch size. Option: (-b).

            max_errors (Optional[int]): The maximum number of errors allowed. Option: (-m).

            first_row (Optional[int]): The first row to process. Option: (-F).

            last_row (Optional[int]): The last row to process. Option: (-L).

            code_page (Optional[str]): The code page. Option: (-C).

            keep_identity (bool): Keep identity values. Option: (-E).

            keep_nulls (bool): Keep null values. Option: (-k).

            hints (Optional[str]): Additional hints. Option: (-h).

            bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w).

            direction: 'in' or 'out'. Option: (-i or -o).

            data_file: The data file. Option: (positional argument).

            error_file: The error file. Option: (-e).

            format_file: The format file to use for 'in'/'out'. Option: (-f).

            batch_size: The batch size. Option: (-b).

            max_errors: The maximum number of errors allowed. Option: (-m).

            first_row: The first row to process. Option: (-F).

            last_row: The last row to process. Option: (-L).

            code_page: The code page. Option: (-C).

            keep_identity: Keep identity values. Option: (-E).

            keep_nulls: Keep null values. Option: (-k).

            hints: Additional hints. Option: (-h).

            bulk_mode: Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w).

                Defaults to "native".

            columns (List[ColumnFormat]): Column formats.

            columns: Column formats.

            bind_data: Data bindings for in-memory BCP.

        """

        direction: Literal["in", "out"]

        data_file: str  # data_file is mandatory for 'in' and 'out'

        direction: str

        data_file: Optional[str] = None  # data_file is mandatory for 'in' and 'out'

        error_file: Optional[str] = None

        format_file: Optional[str] = None

        # write_format_file is removed as 'format' direction is not actively supported

        query: Optional[str] = None  # For 'query' direction

        bulk_mode: Optional[str] = "native"  # Default to 'native' mode

        batch_size: Optional[int] = None

        max_errors: Optional[int] = None

        first_row: Optional[int] = None

        last_row: Optional[int] = None

        code_page: Optional[str] = None

        code_page: Optional[Union[int, str]] = None

        hints: Optional[str] = None

        columns: Optional[List[ColumnFormat]] = field(default_factory=list)

        bind_data: Union[List[BindData], List[List[BindData]]] = field(

            default_factory=list

        )  # New field for bind data

        row_terminator: Optional[bytes] = None

        keep_identity: bool = False

        keep_nulls: bool = False

        hints: Optional[str] = None

        bulk_mode: Literal["native", "char", "unicode"] = "native"

        columns: List[ColumnFormat] = field(default_factory=list)

        use_memory_bcp: bool = False  # Flag for in-memory BCP (bind and sendrow)

        def __post_init__(self):

            if self.direction not in ["in", "out"]:

                raise ValueError("direction must be 'in' or 'out'.")

            if not self.data_file:

                raise ValueError("data_file must be provided and non-empty for 'in' or 'out' directions.")

            if self.error_file is None or not self.error_file:  # Making error_file mandatory for in/out

                raise ValueError("error_file must be provided and non-empty for 'in' or 'out' directions.")

            if self.format_file is not None and not self.format_file:

                raise ValueError("format_file, if provided, must not be an empty string.")

            if self.batch_size is not None and self.batch_size <= 0:

                raise ValueError("batch_size must be a positive integer.")

            if self.max_errors is not None and self.max_errors < 0:

                raise ValueError("max_errors must be a non-negative integer.")

            if self.first_row is not None and self.first_row <= 0:

                raise ValueError("first_row must be a positive integer.")

            if self.last_row is not None and self.last_row <= 0:

                raise ValueError("last_row must be a positive integer.")

            if self.last_row is not None and self.first_row is None:

                raise ValueError("first_row must be specified if last_row is specified.")

            if not self.direction:

                raise ValueError("BCPOptions.direction is a required field.")

            if self.bind_data and not self.use_memory_bcp:

                self.use_memory_bcp = True  # Automatically set if bind_data is provided

            if self.use_memory_bcp and not self.bind_data:

                raise ValueError(

                    "BCPOptions.bind_data must be provided when use_memory_bcp is True."

                )

            if self.direction not in ALLOWED_DIRECTIONS:

                raise ValueError(

                    f"BCPOptions.direction '{self.direction}' is invalid. "

                    f"Allowed directions are: {', '.join(ALLOWED_DIRECTIONS)}."

                )

            # Add this validation for in-memory BCP requiring 'in' direction

            if self.use_memory_bcp and self.direction != "in":

                raise ValueError("in-memory BCP operations require direction='in'")

            # Handle in-memory BCP case separately

            if self.use_memory_bcp:

                if not self.bind_data:

                    raise ValueError(

                        "BCPOptions.bind_data must be provided when use_memory_bcp is True."

                    )

                # For in-memory BCP, data_file is not needed, but error_file is still useful

                if not self.error_file:

                    raise ValueError(

                        "error_file must be provided even for in-memory BCP operations."

                    )

            else:

                # Regular file-based BCP validation

                if self.direction in ["in", "out"]:

                    if not self.data_file:

                        raise ValueError(

                            f"BCPOptions.data_file is required for file-based BCP "

                            f"direction '{self.direction}'."

                        )

                    if not self.error_file:

                        raise ValueError(

                            "error_file must be provided for file-based BCP operations."

                        )

            if self.direction == "queryout" and not self.query:

                raise ValueError("BCPOptions.query is required for BCP direction 'query'.")

            if self.columns and self.format_file:

                raise ValueError(

                    "Cannot specify both 'columns' (for bcp_colfmt) and 'format_file' "

                    "(for bcp_readfmt). Choose one."

                )

            if isinstance(self.code_page, int) and self.code_page < 0:

                raise ValueError(

                    "BCPOptions.code_page, if an integer, must be non-negative."

                )

            if self.bulk_mode not in ALLOWED_FILE_MODES:

                raise ValueError(

                    f"BCPOptions.bulk_mode '{self.bulk_mode}' is invalid. "

                    f"Allowed modes are: {', '.join(ALLOWED_FILE_MODES)}."

                )

            for attr_name in ["batch_size", "max_errors", "first_row", "last_row"]:

                attr_value = getattr(self, attr_name)

                if attr_value is not None and attr_value < 0:

                    raise ValueError(

                        f"BCPOptions.{attr_name} must be non-negative if specified. "

                        f"Got {attr_value}"

                    )

            if (

                self.first_row is not None

                and self.last_row is not None

                and self.last_row < self.first_row

                and self.first_row > self.last_row

            ):

                raise ValueError("last_row must be greater than or equal to first_row.")

            if self.code_page is not None and not self.code_page:

                raise ValueError("code_page, if provided, must not be an empty string.")

            if self.hints is not None and not self.hints:

                raise ValueError("hints, if provided, must not be an empty string.")

            if self.bulk_mode not in ["native", "char", "unicode"]:

                raise ValueError("bulk_mode must be 'native', 'char', or 'unicode'.")

                raise ValueError(

                    "BCPOptions.first_row cannot be greater than BCPOptions.last_row."

                )

            if self.row_terminator is not None and not isinstance(

                self.row_terminator, bytes

            ):

                raise TypeError("row_terminator must be bytes or None.")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FEAT: Bulk Copy Options #106

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!