Skip to content

Commit 9d6fc2f

Browse files
jjerphanphoebusm
andauthored
Restrict custom CA certificate configurations for the Azure SDK to Linux only (#2282)
#### Reference Issues/PRs Extracted from #2252. See discussions in conda-forge/azure-core-cpp-feedstock#23. #### What does this implement or fix? Have the Azure SDK on Windows only use WinHTTP instead of `libcurl`. #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [x] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details> <!-- Thanks for contributing a Pull Request to ArcticDB! Please ensure you have taken a look at: - ArcticDB's Code of Conduct: https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md - ArcticDB's Contribution Licensing: https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing --> --------- Signed-off-by: Julien Jerphanion <[email protected]> Co-authored-by: Phoebus Mak <[email protected]>
1 parent 7721cc2 commit 9d6fc2f

File tree

9 files changed

+389
-69
lines changed

9 files changed

+389
-69
lines changed

cpp/arcticdb/storage/azure/azure_client_impl.cpp

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,52 @@
66
* will be governed by the Apache License, version 2.0.
77
*/
88

9+
/**
10+
* Azure Transport Selection:
11+
*
12+
* This file implements platform-specific HTTP transport selection for Azure storage operations:
13+
*
14+
* - On Windows (_WIN32):
15+
* - Always uses WinHTTP (native Windows HTTP client)
16+
* - CA certificate settings are not supported as WinHTTP uses Windows certificate store
17+
* - Custom CA certificates must be installed via Windows Certificate Manager (certmgr.msc)
18+
* - This provides better performance and integration with Windows security
19+
*
20+
* - On macOS (__APPLE__):
21+
* - Always uses libcurl as the HTTP transport
22+
* - CA certificate settings are not supported
23+
* - Custom CA certificates must be installed via Keychain Access
24+
*
25+
* - On Linux:
26+
* - Always uses libcurl as the HTTP transport
27+
* - Supports custom CA certificate configuration via ca_cert_path and ca_cert_dir
28+
*
29+
* The selection is done at compile time via preprocessor directives to ensure
30+
* optimal performance and minimal runtime overhead. The appropriate transport
31+
* header is included based on the platform, and the transport is configured
32+
* in get_client_options().
33+
*
34+
* Note, for conda-forge's builds, the distribution of `libcurl` uses CA certificates
35+
* provided by the `ca-certificates` package originated from the certifi python package.
36+
*
37+
* See:
38+
* https://github.com/conda-forge/ca-certificates-feedstock/blob/d13d63b3192ec707b514637930fd215d0776c604/recipe/meta.yaml#L8
39+
* https://github.com/conda-forge/curl-feedstock/blob/c6144ac9941ab00393a5a76954ddc19fab8005d1/recipe/build.sh#L19
40+
*/
41+
42+
#if defined(_WIN32)
43+
#include <azure/core/http/win_http_transport.hpp>
44+
#else
945
#include <azure/core/http/curl_transport.hpp>
46+
#endif
47+
48+
#include <azure/core.hpp>
49+
#include <azure/storage/blobs.hpp>
1050

1151
#include <arcticdb/storage/azure/azure_client_impl.hpp>
1252
#include <arcticdb/storage/azure/azure_client_interface.hpp>
1353
#include <arcticdb/storage/object_store_utils.hpp>
54+
#include <arcticdb/util/error_code.hpp>
1455

1556
namespace arcticdb::storage {
1657
using namespace object_store_utils;
@@ -31,18 +72,54 @@ RealAzureClient::RealAzureClient(const Config& conf) :
3172

3273
Azure::Storage::Blobs::BlobClientOptions RealAzureClient::get_client_options(const Config& conf) {
3374
BlobClientOptions client_options;
34-
if (!conf.ca_cert_path().empty() ||
35-
!conf.ca_cert_dir().empty()) { // WARNING: Setting ca_cert_path or ca_cert_dir will force Azure sdk uses libcurl
36-
// as backend support, instead of winhttp
37-
Azure::Core::Http::CurlTransportOptions curl_transport_options;
38-
if (!conf.ca_cert_path().empty()) {
39-
curl_transport_options.CAInfo = conf.ca_cert_path();
40-
}
41-
if (!conf.ca_cert_dir().empty()) {
42-
curl_transport_options.CAPath = conf.ca_cert_dir();
43-
}
44-
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>(curl_transport_options);
75+
76+
#if defined(_WIN32)
77+
// On Windows, always use WinHTTP
78+
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using WinHTTP transport");
79+
if (conf.ca_cert_path().empty() && conf.ca_cert_dir().empty()) {
80+
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::WinHttpTransport>();
81+
} else {
82+
throw ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>(
83+
"CA certificate settings are not supported on Windows. "
84+
"Please use Windows Certificate Manager (certmgr.msc) to manage certificates:\n"
85+
"1. Open Certificate Manager (certmgr.msc)\n"
86+
"2. Navigate to 'Trusted Root Certification Authorities'\n"
87+
"3. Right-click and select 'All Tasks > Import'\n"
88+
"4. Select your certificate file and follow the import wizard\n"
89+
"5. Ensure 'Place all certificates in the following store' is selected\n"
90+
"6. Complete the import by clicking 'Next' and 'Finish'"
91+
);
92+
}
93+
#elif defined(__APPLE__)
94+
// On macOS, always use libcurl but ignore CA cert paths
95+
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using libcurl transport");
96+
if (conf.ca_cert_path().empty() && conf.ca_cert_dir().empty()) {
97+
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>();
98+
} else {
99+
throw ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>(
100+
"CA certificate settings are not supported on macOS. "
101+
"Please use Keychain Access to manage certificates:\n"
102+
"1. Open Keychain Access (Applications > Utilities > Keychain Access)\n"
103+
"2. Select 'System' keychain from the left sidebar\n"
104+
"3. Click File > Import Items\n"
105+
"4. Select your certificate file\n"
106+
"5. Enter your keychain password if prompted\n"
107+
"6. The certificate will be added to the System keychain"
108+
);
45109
}
110+
#else
111+
// On Linux, use libcurl with CA cert configuration
112+
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using libcurl transport");
113+
Azure::Core::Http::CurlTransportOptions curl_transport_options;
114+
if (!conf.ca_cert_path().empty()) {
115+
curl_transport_options.CAInfo = conf.ca_cert_path();
116+
}
117+
if (!conf.ca_cert_dir().empty()) {
118+
curl_transport_options.CAPath = conf.ca_cert_dir();
119+
}
120+
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>(curl_transport_options);
121+
#endif
122+
46123
return client_options;
47124
}
48125

cpp/arcticdb/storage/azure/azure_client_impl.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ class RealAzureClient : public AzureClientWrapper {
1515
private:
1616
Azure::Storage::Blobs::BlobContainerClient container_client;
1717

18-
static Azure::Storage::Blobs::BlobClientOptions get_client_options(const Config& conf);
19-
2018
public:
2119
explicit RealAzureClient(const Config& conf);
2220

21+
static Azure::Storage::Blobs::BlobClientOptions get_client_options(const Config& conf);
22+
2323
void write_blob(
2424
const std::string& blob_name, Segment& segment,
2525
const Azure::Storage::Blobs::UploadBlockBlobFromOptions& upload_option, unsigned int request_timeout
@@ -36,4 +36,4 @@ class RealAzureClient : public AzureClientWrapper {
3636

3737
Azure::Storage::Blobs::ListBlobsPagedResponse list_blobs(const std::string& prefix) override;
3838
};
39-
} // namespace arcticdb::storage::azure
39+
} // namespace arcticdb::storage::azure
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/* Copyright 2023 Man Group Operations Limited
2+
*
3+
* Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
4+
*
5+
* As of the Change Date specified in that file, in accordance with the Business Source License, use of this software
6+
* will be governed by the Apache License, version 2.0.
7+
*/
8+
9+
#include <gtest/gtest.h>
10+
#include <arcticdb/util/test/gtest_utils.hpp>
11+
12+
#include <arcticdb/storage/azure/azure_client_impl.hpp>
13+
#include <arcticdb/storage/azure/azure_client_interface.hpp>
14+
#include <arcticdb/util/error_code.hpp>
15+
16+
using namespace arcticdb;
17+
using namespace storage;
18+
using namespace azure;
19+
20+
class AzureTransportTest : public testing::Test {
21+
protected:
22+
arcticdb::proto::azure_storage::Config config;
23+
24+
void SetUp() override {
25+
config.set_endpoint("https://testaccount.blob.core.windows.net");
26+
config.set_container_name("testcontainer");
27+
}
28+
};
29+
30+
TEST_F(AzureTransportTest, TestWindowsTransportSelection) {
31+
#if defined(_WIN32)
32+
// Test that WinHTTP is used when no CA cert settings are provided
33+
auto options = RealAzureClient::get_client_options(config);
34+
ASSERT_NE(options.Transport.Transport, nullptr);
35+
36+
// Test that an exception is thrown when CA cert settings are provided
37+
config.set_ca_cert_path("/path/to/cert.pem");
38+
ASSERT_THROW(RealAzureClient::get_client_options(config), ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>);
39+
40+
config.set_ca_cert_path("");
41+
config.set_ca_cert_dir("/path/to/certs");
42+
ASSERT_THROW(RealAzureClient::get_client_options(config), ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>);
43+
#endif
44+
}
45+
46+
TEST_F(AzureTransportTest, TestMacOSTransportSelection) {
47+
#if defined(__APPLE__)
48+
// Test that libcurl is used when no CA cert settings are provided
49+
auto options = RealAzureClient::get_client_options(config);
50+
ASSERT_NE(options.Transport.Transport, nullptr);
51+
52+
// Test that an exception is thrown when CA cert settings are provided
53+
config.set_ca_cert_path("/path/to/cert.pem");
54+
ASSERT_THROW(RealAzureClient::get_client_options(config), ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>);
55+
56+
config.set_ca_cert_path("");
57+
config.set_ca_cert_dir("/path/to/certs");
58+
ASSERT_THROW(RealAzureClient::get_client_options(config), ArcticSpecificException<ErrorCode::E_INVALID_USER_ARGUMENT>);
59+
#endif
60+
}
61+
62+
TEST_F(AzureTransportTest, TestLinuxTransportSelection) {
63+
#if !defined(_WIN32) && !defined(__APPLE__)
64+
// Test that libcurl is used with default settings
65+
auto options = RealAzureClient::get_client_options(config);
66+
ASSERT_NE(options.Transport.Transport, nullptr);
67+
68+
// Test that libcurl is used with CA cert path
69+
config.set_ca_cert_path("/path/to/cert.pem");
70+
options = RealAzureClient::get_client_options(config);
71+
ASSERT_NE(options.Transport.Transport, nullptr);
72+
73+
// Test that libcurl is used with CA cert directory
74+
config.set_ca_cert_path("");
75+
config.set_ca_cert_dir("/path/to/certs");
76+
options = RealAzureClient::get_client_options(config);
77+
ASSERT_NE(options.Transport.Transport, nullptr);
78+
79+
// Test that libcurl is used with both CA cert path and directory
80+
config.set_ca_cert_path("/path/to/cert.pem");
81+
options = RealAzureClient::get_client_options(config);
82+
ASSERT_NE(options.Transport.Transport, nullptr);
83+
#endif
84+
}

cpp/vcpkg.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,18 @@
6868
"name": "arrow",
6969
"default-features": false
7070
},
71-
"azure-core-cpp",
71+
{
72+
"name": "azure-core-cpp",
73+
"default-features": false,
74+
"features": [ "winhttp" ],
75+
"platform": "windows"
76+
},
77+
{
78+
"name": "azure-core-cpp",
79+
"default-features": false,
80+
"features": [ "curl" ],
81+
"platform": "!windows"
82+
},
7283
"azure-identity-cpp",
7384
"azure-storage-blobs-cpp",
7485
"benchmark",

python/arcticdb/adapters/azure_library_adapter.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,22 @@
66
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
77
"""
88

9+
import platform
910
import re
11+
import ssl
1012
import time
13+
from collections import namedtuple
14+
from dataclasses import dataclass, fields
1115
from typing import Optional
12-
import ssl
13-
import platform
1416

1517
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap, LibraryConfig
16-
from arcticdb.version_store.helper import add_azure_library_to_env
1718
from arcticdb.config import _DEFAULT_ENV
19+
from arcticdb.encoding_version import EncodingVersion
20+
from arcticdb.exceptions import ArcticException
1821
from arcticdb.version_store._store import NativeVersionStore
22+
from arcticdb.version_store.helper import add_azure_library_to_env
1923
from arcticdb.adapters.arctic_library_adapter import ArcticLibraryAdapter, set_library_options
2024
from arcticdb_ext.storage import StorageOverride, AzureOverride, CONFIG_LIBRARY_NAME
21-
from arcticdb.encoding_version import EncodingVersion
22-
from collections import namedtuple
23-
from dataclasses import dataclass, fields
2425

2526
PARSED_QUERY = namedtuple("PARSED_QUERY", ["region"])
2627

@@ -59,13 +60,18 @@ def __init__(self, uri: str, encoding_version: EncodingVersion, *args, **kwargs)
5960
]
6061
)
6162
self._container = self._query_params.Container
62-
if platform.system() != "Linux" and (self._query_params.CA_cert_path or self._query_params.CA_cert_dir):
63+
64+
# Extract CA certificate settings from parsed query parameters
65+
# Ensure values are always strings (not None)
66+
self._ca_cert_path = str(self._query_params.CA_cert_path) if self._query_params.CA_cert_path else ""
67+
self._ca_cert_dir = str(self._query_params.CA_cert_dir) if self._query_params.CA_cert_dir else ""
68+
69+
if platform.system() != "Linux" and (self._ca_cert_path or self._ca_cert_dir):
6370
raise ValueError(
6471
"You have provided `ca_cert_path` or `ca_cert_dir` in the URI which is only supported on Linux. "
6572
"Remove the setting in the connection URI and use your operating system defaults."
6673
)
67-
self._ca_cert_path = self._query_params.CA_cert_path
68-
self._ca_cert_dir = self._query_params.CA_cert_dir
74+
6975
if not self._ca_cert_path and not self._ca_cert_dir and platform.system() == "Linux":
7076
if ssl.get_default_verify_paths().cafile is not None:
7177
self._ca_cert_path = ssl.get_default_verify_paths().cafile

python/arcticdb/version_store/helper.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,10 @@ def get_azure_proto(
430430
azure.prefix = f"{lib_name}{time.time() * 1e9:.0f}"
431431
else:
432432
azure.prefix = lib_name
433-
azure.ca_cert_path = ca_cert_path if ca_cert_path else ""
434-
azure.ca_cert_dir = ca_cert_dir if ca_cert_dir else ""
433+
# Always set the values explicitly to ensure they're stored in the config
434+
# Convert None to empty string, but preserve non-empty strings
435+
azure.ca_cert_path = str(ca_cert_path) if ca_cert_path is not None else ""
436+
azure.ca_cert_dir = str(ca_cert_dir) if ca_cert_dir is not None else ""
435437

436438
sid, storage = get_storage_for_lib_name(azure.prefix, env)
437439
storage.config.Pack(azure, type_url_prefix="cxx.arctic.org")

python/tests/integration/arcticdb/test_arctic.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -274,25 +274,6 @@ def test_azurite_no_ssl_verification(monkeypatch, azurite_storage, client_cert_f
274274
ac.delete_library(lib_name)
275275

276276

277-
@AZURE_TESTS_MARK
278-
@SSL_TESTS_MARK
279-
@pytest.mark.parametrize("client_cert_file", parameter_display_status)
280-
@pytest.mark.parametrize("client_cert_dir", parameter_display_status)
281-
def test_azurite_ssl_verification(azurite_ssl_storage, monkeypatch, client_cert_file, client_cert_dir, lib_name):
282-
storage = azurite_ssl_storage
283-
# Leaving ca file and ca dir unset will fallback to using os default setting,
284-
# which is different from the test environment
285-
default_setting = DefaultSetting(storage.factory)
286-
monkeypatch.setattr("ssl.get_default_verify_paths", lambda: default_setting)
287-
uri = edit_connection_string(storage.arctic_uri, ";", storage, None, client_cert_file, client_cert_dir)
288-
ac = Arctic(uri)
289-
try:
290-
lib = ac.create_library(lib_name)
291-
lib.write("sym", pd.DataFrame())
292-
finally:
293-
ac.delete_library(lib_name)
294-
295-
296277
def test_basic_metadata(lmdb_version_store):
297278
lib = lmdb_version_store
298279
df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})

0 commit comments

Comments
 (0)