Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 125 additions & 43 deletions mssql_python/bcp_options.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
BCPOptions and ColumnFormat classes for Bulk Copy Program (BCP) operations.
"""
import logging
from dataclasses import dataclass, field
from typing import List, Optional, Literal
from typing import List, Optional, Union

# Removed unused import: BCPControlOptions

# defining constants for BCP control options
ALLOWED_DIRECTIONS = ("in", "out", "queryout")
ALLOWED_FILE_MODES = ("native", "char", "unicode")


@dataclass
Expand All @@ -23,31 +35,33 @@ class ColumnFormat:
Must be a positive integer.
"""

prefix_len: int
data_len: int
file_col: int = 1
user_data_type: int = 0
prefix_len: int = 0
data_len: int = 0
field_terminator: Optional[bytes] = None
row_terminator: Optional[bytes] = None
terminator_len: int = 0
server_col: int = 1
file_col: int = 1

def __post_init__(self):
logging.debug("Initializing ColumnFormat: %r", self)
if self.prefix_len < 0:
logging.error("prefix_len must be a non-negative integer.")
raise ValueError("prefix_len must be a non-negative integer.")
if self.data_len < 0:
logging.error("data_len must be a non-negative integer.")
raise ValueError("data_len must be a non-negative integer.")
if self.server_col <= 0:
logging.error("server_col must be a positive integer (1-based).")
raise ValueError("server_col must be a positive integer (1-based).")
if self.file_col <= 0:
logging.error("file_col must be a positive integer (1-based).")
raise ValueError("file_col must be a positive integer (1-based).")
if self.field_terminator is not None and not isinstance(
self.field_terminator, bytes
):
logging.error("field_terminator must be bytes or None.")
raise TypeError("field_terminator must be bytes or None.")
if self.row_terminator is not None and not isinstance(
self.row_terminator, bytes
):
raise TypeError("row_terminator must be bytes or None.")


@dataclass
class BCPOptions:
Expand All @@ -71,51 +85,119 @@ class BCPOptions:
columns (List[ColumnFormat]): Column formats.
"""

direction: Literal["in", "out"]
data_file: str # data_file is mandatory for 'in' and 'out'
direction: str
data_file: Optional[str] = None # data_file is mandatory for 'in' and 'out'
error_file: Optional[str] = None
format_file: Optional[str] = None
# write_format_file is removed as 'format' direction is not actively supported
query: Optional[str] = None # For 'query' direction
bulk_mode: Optional[str] = "native" # Default to 'native' mode
batch_size: Optional[int] = None
max_errors: Optional[int] = None
first_row: Optional[int] = None
last_row: Optional[int] = None
code_page: Optional[str] = None
code_page: Optional[Union[int, str]] = None
hints: Optional[str] = None
columns: Optional[List[ColumnFormat]] = field(default_factory=list)
row_terminator: Optional[bytes] = None
keep_identity: bool = False
keep_nulls: bool = False
hints: Optional[str] = None
bulk_mode: Literal["native", "char", "unicode"] = "native"
columns: List[ColumnFormat] = field(default_factory=list)

def __post_init__(self):
if self.direction not in ["in", "out"]:
raise ValueError("direction must be 'in' or 'out'.")
logging.debug("Initializing BCPOptions: %r", self)
if not self.direction:
logging.error("BCPOptions.direction is a required field.")
raise ValueError("BCPOptions.direction is a required field.")

if self.direction not in ALLOWED_DIRECTIONS:
logging.error(
"BCPOptions.direction '%s' is invalid. Allowed directions are: %s.",
self.direction, ", ".join(ALLOWED_DIRECTIONS)
)
raise ValueError(
f"BCPOptions.direction '{self.direction}' is invalid. "
f"Allowed directions are: {', '.join(ALLOWED_DIRECTIONS)}."
)

if self.direction in ["in", "out"]:
if not self.data_file:
logging.error(
"BCPOptions.data_file is required for BCP direction '%s'.",
self.direction
)
raise ValueError(
f"BCPOptions.data_file is required for BCP direction '{self.direction}'."
)
if self.direction == "queryout" and not self.query:
logging.error("BCPOptions.query is required for BCP direction 'query'.")
raise ValueError("BCPOptions.query is required for BCP direction 'query'.")

if not self.data_file:
raise ValueError("data_file must be provided and non-empty for 'in' or 'out' directions.")
if self.error_file is None or not self.error_file: # Making error_file mandatory for in/out
raise ValueError("error_file must be provided and non-empty for 'in' or 'out' directions.")

if self.format_file is not None and not self.format_file:
raise ValueError("format_file, if provided, must not be an empty string.")
if self.batch_size is not None and self.batch_size <= 0:
raise ValueError("batch_size must be a positive integer.")
if self.max_errors is not None and self.max_errors < 0:
raise ValueError("max_errors must be a non-negative integer.")
if self.first_row is not None and self.first_row <= 0:
raise ValueError("first_row must be a positive integer.")
if self.last_row is not None and self.last_row <= 0:
raise ValueError("last_row must be a positive integer.")
if self.last_row is not None and self.first_row is None:
raise ValueError("first_row must be specified if last_row is specified.")
logging.error(
"data_file must be provided and non-empty for 'in' or 'out' directions."
)
raise ValueError(
"data_file must be provided and non-empty for 'in' or 'out' directions."
)
if self.error_file is None or not self.error_file:
logging.error(
"error_file must be provided and non-empty for 'in' or 'out' directions."
)
raise ValueError(
"error_file must be provided and non-empty for 'in' or 'out' directions."
)

if self.columns and self.format_file:
logging.error(
"Cannot specify both 'columns' (for bcp_colfmt) and 'format_file' "
"(for bcp_readfmt). Choose one."
)
raise ValueError(
"Cannot specify both 'columns' (for bcp_colfmt) and 'format_file' "
"(for bcp_readfmt). Choose one."
)

if isinstance(self.code_page, int) and self.code_page < 0:
logging.error(
"BCPOptions.code_page, if an integer, must be non-negative."
)
raise ValueError(
"BCPOptions.code_page, if an integer, must be non-negative."
)

if self.bulk_mode not in ALLOWED_FILE_MODES:
logging.error(
"BCPOptions.bulk_mode '%s' is invalid. Allowed modes are: %s.",
self.bulk_mode, ", ".join(ALLOWED_FILE_MODES)
)
raise ValueError(
f"BCPOptions.bulk_mode '{self.bulk_mode}' is invalid. "
f"Allowed modes are: {', '.join(ALLOWED_FILE_MODES)}."
)
for attr_name in ["batch_size", "max_errors", "first_row", "last_row"]:
attr_value = getattr(self, attr_name)
if attr_value is not None and attr_value < 0:
logging.error(
"BCPOptions.%s must be non-negative if specified. Got %r",
attr_name, attr_value
)
raise ValueError(
f"BCPOptions.{attr_name} must be non-negative if specified. Got {attr_value!r}"
)

if (
self.first_row is not None
and self.last_row is not None
and self.last_row < self.first_row
and self.first_row > self.last_row
):
logging.error(
"BCPOptions.first_row cannot be greater than BCPOptions.last_row."
)
raise ValueError(
"BCPOptions.first_row cannot be greater than BCPOptions.last_row."
)

if self.row_terminator is not None and not isinstance(
self.row_terminator, bytes
):
raise ValueError("last_row must be greater than or equal to first_row.")
if self.code_page is not None and not self.code_page:
raise ValueError("code_page, if provided, must not be an empty string.")
if self.hints is not None and not self.hints:
raise ValueError("hints, if provided, must not be an empty string.")
if self.bulk_mode not in ["native", "char", "unicode"]:
raise ValueError("bulk_mode must be 'native', 'char', or 'unicode'.")
logging.error("row_terminator must be bytes or None.")
raise TypeError("row_terminator must be bytes or None.")
15 changes: 15 additions & 0 deletions mssql_python/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,18 @@ class ConstantsDDBC(Enum):
SQL_C_WCHAR = -8
SQL_NULLABLE = 1
SQL_MAX_NUMERIC_LEN = 16

class BCPControlOptions(Enum):
"""
Constants for BCP control options.
The values are the string representations expected by the BCP API.
"""
BATCH_SIZE = "BCPBATCH"
MAX_ERRORS = "BCPMAXERRS"
FIRST_ROW = "BCPFIRST"
LAST_ROW = "BCPLAST"
FILE_CODE_PAGE = "BCPFILECP"
KEEP_IDENTITY = "BCPKEEPIDENTITY"
KEEP_NULLS = "BCPKEEPNULLS"
HINTS = "BCPHINTS"
SET_ROW_TERMINATOR = "BCPSETROWTERM"
Loading