Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 72 additions & 3 deletions mssql_python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
This module initializes the mssql_python package.
"""

# Import for pooling functionality
from .pooling import PoolingManager

# Exceptions
# https://www.python.org/dev/peps/pep-0249/#exceptions
from .exceptions import (
Expand Down Expand Up @@ -45,15 +48,81 @@
from .logging_config import setup_logging, get_logger

# Constants
from .constants import ConstantsDDBC
from .constants import ConstantsDDBC, BCPControlOptions, BCPDataTypes

# BCP
from .bcp_options import BCPOptions, ColumnFormat

# GLOBALS
# Read-Only
# Read-Only - PEP-249 mandates these names
apilevel = "2.0"
paramstyle = "qmark"
threadsafety = 1

from .pooling import PoolingManager
# Create direct variables for easier access to BCP data type constants Read-only
# Character/string types
SQLTEXT = BCPDataTypes.SQLTEXT.value
SQLVARCHAR = BCPDataTypes.SQLVARCHAR.value
SQLCHARACTER = BCPDataTypes.SQLCHARACTER.value
SQLBIGCHAR = BCPDataTypes.SQLBIGCHAR.value
SQLBIGVARCHAR = BCPDataTypes.SQLBIGVARCHAR.value
SQLNCHAR = BCPDataTypes.SQLNCHAR.value
SQLNVARCHAR = BCPDataTypes.SQLNVARCHAR.value
SQLNTEXT = BCPDataTypes.SQLNTEXT.value

# Binary types
SQLBINARY = BCPDataTypes.SQLBINARY.value
SQLVARBINARY = BCPDataTypes.SQLVARBINARY.value
SQLBIGBINARY = BCPDataTypes.SQLBIGBINARY.value
SQLBIGVARBINARY = BCPDataTypes.SQLBIGVARBINARY.value
SQLIMAGE = BCPDataTypes.SQLIMAGE.value

# Integer types
SQLBIT = BCPDataTypes.SQLBIT.value
SQLBITN = BCPDataTypes.SQLBITN.value
SQLINT1 = BCPDataTypes.SQLINT1.value
SQLINT2 = BCPDataTypes.SQLINT2.value
SQLINT4 = BCPDataTypes.SQLINT4.value
SQLINT8 = BCPDataTypes.SQLINT8.value
SQLINTN = BCPDataTypes.SQLINTN.value

# Floating point types
SQLFLT4 = BCPDataTypes.SQLFLT4.value
SQLFLT8 = BCPDataTypes.SQLFLT8.value
SQLFLTN = BCPDataTypes.SQLFLTN.value

# Decimal/numeric types
SQLDECIMAL = BCPDataTypes.SQLDECIMAL.value
SQLNUMERIC = BCPDataTypes.SQLNUMERIC.value
SQLDECIMALN = BCPDataTypes.SQLDECIMALN.value
SQLNUMERICN = BCPDataTypes.SQLNUMERICN.value

# Money types
SQLMONEY = BCPDataTypes.SQLMONEY.value
SQLMONEY4 = BCPDataTypes.SQLMONEY4.value
SQLMONEYN = BCPDataTypes.SQLMONEYN.value

# Date/time types
SQLDATETIME = BCPDataTypes.SQLDATETIME.value
SQLDATETIM4 = BCPDataTypes.SQLDATETIM4.value
SQLDATETIMN = BCPDataTypes.SQLDATETIMN.value
SQLDATEN = BCPDataTypes.SQLDATEN.value
SQLTIMEN = BCPDataTypes.SQLTIMEN.value
SQLDATETIME2N = BCPDataTypes.SQLDATETIME2N.value
SQLDATETIMEOFFSETN = BCPDataTypes.SQLDATETIMEOFFSETN.value

# Special types
SQLUNIQUEID = BCPDataTypes.SQLUNIQUEID.value
SQLVARIANT = BCPDataTypes.SQLVARIANT.value
SQLUDT = BCPDataTypes.SQLUDT.value
SQLXML = BCPDataTypes.SQLXML.value
SQLTABLE = BCPDataTypes.SQLTABLE.value

# BCP special values
SQL_VARLEN_DATA = BCPDataTypes.SQL_VARLEN_DATA.value
SQL_NULL_DATA = BCPDataTypes.SQL_NULL_DATA.value


def pooling(max_size=100, idle_timeout=600, enabled=True):
# """
# Enable connection pooling with the specified parameters.
Expand Down
232 changes: 169 additions & 63 deletions mssql_python/bcp_options.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,84 @@
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
Provides classes for configuring SQL Server Bulk Copy Program (BCP) operations.

This module defines the core classes needed for BCP functionality:
- BindData: Represents data bindings for in-memory BCP operations
- ColumnFormat: Defines column formatting for BCP operations
- BCPOptions: Configures the overall BCP operation settings
"""

from dataclasses import dataclass, field
from typing import List, Optional, Literal
from typing import List, Optional, Union, Any

# defining constants for BCP control options
ALLOWED_DIRECTIONS = ("in", "out", "queryout")
ALLOWED_FILE_MODES = ("native", "char", "unicode")


@dataclass
class BindData:
"""
Represents the data binding for a column in a bulk copy operation.
Used with bcp_bind API.

Attributes:
data: Pointer to the data to be copied. Can be primitive types or bytes.
indicator_length: Length of indicator in bytes (0, 1, 2, 4, or 8).
data_length: Count of bytes of data in the variable
(can be SQL_VARLEN_DATA/SQL_NULL_DATA).
terminator: Byte pattern marking the end of the variable, if any.
terminator_length: Count of bytes in the terminator.
data_type: The C data type of the variable (using SQL Server type tokens).
server_col: Ordinal position of the column in the database table (1-based).
"""

data: Any = None
indicator_length: int = 0
data_length: int = 0 # Can be SQL_VARLEN_DATA or SQL_NULL_DATA
terminator: Optional[bytes] = None
terminator_length: int = 0
data_type: int = 0 # SQL Server data type tokens
server_col: int = 0 # 1-based column number in table

def __post_init__(self):
if self.indicator_length not in [0, 1, 2, 4, 8]:
raise ValueError("indicator_length must be 0, 1, 2, 4, or 8.")
if self.server_col <= 0:
raise ValueError("server_col must be a positive integer (1-based).")
if self.terminator is not None and not isinstance(self.terminator, bytes):
raise TypeError("terminator must be bytes or None.")


@dataclass
class ColumnFormat:
"""
Represents the format of a column in a bulk copy operation.
Attributes:
prefix_len (int): Option: (format_file) or (prefix_len, data_len).
prefix_len: Option: (format_file) or (prefix_len, data_len).
The length of the prefix for fixed-length data types. Must be non-negative.
data_len (int): Option: (format_file) or (prefix_len, data_len).
data_len: Option: (format_file) or (prefix_len, data_len).
The length of the data. Must be non-negative.
field_terminator (Optional[bytes]): Option: (-t). The field terminator string.
field_terminator: Option: (-t). The field terminator string.
e.g., b',' for comma-separated values.
row_terminator (Optional[bytes]): Option: (-r). The row terminator string.
row_terminator: Option: (-r). The row terminator string.
e.g., b'\\n' for newline-terminated rows.
server_col (int): Option: (format_file) or (server_col). The 1-based column number
server_col: Option: (format_file) or (server_col). The 1-based column number
in the SQL Server table. Defaults to 1, representing the first column.
Must be a positive integer.
file_col (int): Option: (format_file) or (file_col). The 1-based column number
file_col: Option: (format_file) or (file_col). The 1-based column number
in the data file. Defaults to 1, representing the first column.
Must be a positive integer.
"""

prefix_len: int
data_len: int
file_col: int = 1
user_data_type: int = 0
prefix_len: int = 0
data_len: int = 0
field_terminator: Optional[bytes] = None
row_terminator: Optional[bytes] = None
terminator_len: int = 0
server_col: int = 1
file_col: int = 1

def __post_init__(self):
if self.prefix_len < 0:
Expand All @@ -43,79 +93,135 @@ def __post_init__(self):
self.field_terminator, bytes
):
raise TypeError("field_terminator must be bytes or None.")
if self.row_terminator is not None and not isinstance(
self.row_terminator, bytes
):
raise TypeError("row_terminator must be bytes or None.")


@dataclass
class BCPOptions:
"""
Represents the options for a bulk copy operation.
Attributes:
direction (Literal[str]): 'in' or 'out'. Option: (-i or -o).
data_file (str): The data file. Option: (positional argument).
error_file (Optional[str]): The error file. Option: (-e).
format_file (Optional[str]): The format file to use for 'in'/'out'. Option: (-f).
batch_size (Optional[int]): The batch size. Option: (-b).
max_errors (Optional[int]): The maximum number of errors allowed. Option: (-m).
first_row (Optional[int]): The first row to process. Option: (-F).
last_row (Optional[int]): The last row to process. Option: (-L).
code_page (Optional[str]): The code page. Option: (-C).
keep_identity (bool): Keep identity values. Option: (-E).
keep_nulls (bool): Keep null values. Option: (-k).
hints (Optional[str]): Additional hints. Option: (-h).
bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w).
direction: 'in' or 'out'. Option: (-i or -o).
data_file: The data file. Option: (positional argument).
error_file: The error file. Option: (-e).
format_file: The format file to use for 'in'/'out'. Option: (-f).
batch_size: The batch size. Option: (-b).
max_errors: The maximum number of errors allowed. Option: (-m).
first_row: The first row to process. Option: (-F).
last_row: The last row to process. Option: (-L).
code_page: The code page. Option: (-C).
keep_identity: Keep identity values. Option: (-E).
keep_nulls: Keep null values. Option: (-k).
hints: Additional hints. Option: (-h).
bulk_mode: Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w).
Defaults to "native".
columns (List[ColumnFormat]): Column formats.
columns: Column formats.
bind_data: Data bindings for in-memory BCP.
"""

direction: Literal["in", "out"]
data_file: str # data_file is mandatory for 'in' and 'out'
direction: str
data_file: Optional[str] = None # data_file is mandatory for 'in' and 'out'
error_file: Optional[str] = None
format_file: Optional[str] = None
# write_format_file is removed as 'format' direction is not actively supported
query: Optional[str] = None # For 'query' direction
bulk_mode: Optional[str] = "native" # Default to 'native' mode
batch_size: Optional[int] = None
max_errors: Optional[int] = None
first_row: Optional[int] = None
last_row: Optional[int] = None
code_page: Optional[str] = None
code_page: Optional[Union[int, str]] = None
hints: Optional[str] = None
columns: Optional[List[ColumnFormat]] = field(default_factory=list)
bind_data: Union[List[BindData], List[List[BindData]]] = field(
default_factory=list
) # New field for bind data
row_terminator: Optional[bytes] = None
keep_identity: bool = False
keep_nulls: bool = False
hints: Optional[str] = None
bulk_mode: Literal["native", "char", "unicode"] = "native"
columns: List[ColumnFormat] = field(default_factory=list)
use_memory_bcp: bool = False # Flag for in-memory BCP (bind and sendrow)

def __post_init__(self):
if self.direction not in ["in", "out"]:
raise ValueError("direction must be 'in' or 'out'.")
if not self.data_file:
raise ValueError("data_file must be provided and non-empty for 'in' or 'out' directions.")
if self.error_file is None or not self.error_file: # Making error_file mandatory for in/out
raise ValueError("error_file must be provided and non-empty for 'in' or 'out' directions.")

if self.format_file is not None and not self.format_file:
raise ValueError("format_file, if provided, must not be an empty string.")
if self.batch_size is not None and self.batch_size <= 0:
raise ValueError("batch_size must be a positive integer.")
if self.max_errors is not None and self.max_errors < 0:
raise ValueError("max_errors must be a non-negative integer.")
if self.first_row is not None and self.first_row <= 0:
raise ValueError("first_row must be a positive integer.")
if self.last_row is not None and self.last_row <= 0:
raise ValueError("last_row must be a positive integer.")
if self.last_row is not None and self.first_row is None:
raise ValueError("first_row must be specified if last_row is specified.")
if not self.direction:
raise ValueError("BCPOptions.direction is a required field.")

if self.bind_data and not self.use_memory_bcp:
self.use_memory_bcp = True # Automatically set if bind_data is provided

if self.use_memory_bcp and not self.bind_data:
raise ValueError(
"BCPOptions.bind_data must be provided when use_memory_bcp is True."
)

if self.direction not in ALLOWED_DIRECTIONS:
raise ValueError(
f"BCPOptions.direction '{self.direction}' is invalid. "
f"Allowed directions are: {', '.join(ALLOWED_DIRECTIONS)}."
)

# Add this validation for in-memory BCP requiring 'in' direction
if self.use_memory_bcp and self.direction != "in":
raise ValueError("in-memory BCP operations require direction='in'")

# Handle in-memory BCP case separately
if self.use_memory_bcp:
if not self.bind_data:
raise ValueError(
"BCPOptions.bind_data must be provided when use_memory_bcp is True."
)
# For in-memory BCP, data_file is not needed, but error_file is still useful
if not self.error_file:
raise ValueError(
"error_file must be provided even for in-memory BCP operations."
)
else:
# Regular file-based BCP validation
if self.direction in ["in", "out"]:
if not self.data_file:
raise ValueError(
f"BCPOptions.data_file is required for file-based BCP "
f"direction '{self.direction}'."
)
if not self.error_file:
raise ValueError(
"error_file must be provided for file-based BCP operations."
)

if self.direction == "queryout" and not self.query:
raise ValueError("BCPOptions.query is required for BCP direction 'query'.")

if self.columns and self.format_file:
raise ValueError(
"Cannot specify both 'columns' (for bcp_colfmt) and 'format_file' "
"(for bcp_readfmt). Choose one."
)

if isinstance(self.code_page, int) and self.code_page < 0:
raise ValueError(
"BCPOptions.code_page, if an integer, must be non-negative."
)

if self.bulk_mode not in ALLOWED_FILE_MODES:
raise ValueError(
f"BCPOptions.bulk_mode '{self.bulk_mode}' is invalid. "
f"Allowed modes are: {', '.join(ALLOWED_FILE_MODES)}."
)
for attr_name in ["batch_size", "max_errors", "first_row", "last_row"]:
attr_value = getattr(self, attr_name)
if attr_value is not None and attr_value < 0:
raise ValueError(
f"BCPOptions.{attr_name} must be non-negative if specified. "
f"Got {attr_value}"
)

if (
self.first_row is not None
and self.last_row is not None
and self.last_row < self.first_row
and self.first_row > self.last_row
):
raise ValueError("last_row must be greater than or equal to first_row.")
if self.code_page is not None and not self.code_page:
raise ValueError("code_page, if provided, must not be an empty string.")
if self.hints is not None and not self.hints:
raise ValueError("hints, if provided, must not be an empty string.")
if self.bulk_mode not in ["native", "char", "unicode"]:
raise ValueError("bulk_mode must be 'native', 'char', or 'unicode'.")
raise ValueError(
"BCPOptions.first_row cannot be greater than BCPOptions.last_row."
)

if self.row_terminator is not None and not isinstance(
self.row_terminator, bytes
):
raise TypeError("row_terminator must be bytes or None.")
Loading