Skip to content

Commit

Permalink
'gdal vector select': add a --exclude switch
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Feb 11, 2025
1 parent c4a6fa7 commit 32d4ef0
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 27 deletions.
103 changes: 82 additions & 21 deletions apps/gdalalg_vector_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,15 @@ GDALVectorSelectAlgorithm::GDALVectorSelectAlgorithm(bool standaloneStep)
: GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL,
standaloneStep)
{
AddArg("fields", 0, _("Selected fields"), &m_selectedFields)
AddArg("fields", 0, _("Fields to select (or exclude if --exclude)"),
&m_fields)
.SetPositional()
.SetRequired();
AddArg("exclude", 0, _("Exclude specified fields"), &m_exclude)
.SetMutualExclusionGroup("exclude-ignore");
AddArg("ignore-missing-fields", 0, _("Ignore missing fields"),
&m_ignoreMissingFields);
&m_ignoreMissingFields)
.SetMutualExclusionGroup("exclude-ignore");
}

namespace
Expand All @@ -51,7 +55,6 @@ class GDALVectorSelectAlgorithmLayer final
public OGRGetNextFeatureThroughRaw<GDALVectorSelectAlgorithmLayer>
{
private:
bool m_bIsOK = true;
OGRLayer &m_oSrcLayer;
OGRFeatureDefn *const m_poFeatureDefn = nullptr;
std::vector<int> m_anMapSrcFieldsToDstFields{};
Expand Down Expand Up @@ -81,16 +84,24 @@ class GDALVectorSelectAlgorithmLayer final
DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorSelectAlgorithmLayer)

public:
GDALVectorSelectAlgorithmLayer(
OGRLayer &oSrcLayer, const std::vector<std::string> &selectedFields,
bool bStrict)
explicit GDALVectorSelectAlgorithmLayer(OGRLayer &oSrcLayer)
: m_oSrcLayer(oSrcLayer),
m_poFeatureDefn(new OGRFeatureDefn(oSrcLayer.GetName()))
{
SetDescription(oSrcLayer.GetDescription());
m_poFeatureDefn->SetGeomType(wkbNone);
m_poFeatureDefn->Reference();
}

~GDALVectorSelectAlgorithmLayer() override
{
if (m_poFeatureDefn)
m_poFeatureDefn->Dereference();
}

bool IncludeFields(const std::vector<std::string> &selectedFields,
bool bStrict)
{
std::set<std::string> oSetSelFields;
std::set<std::string> oSetSelFieldsUC;
for (const std::string &osFieldName : selectedFields)
Expand All @@ -101,7 +112,7 @@ class GDALVectorSelectAlgorithmLayer final

std::set<std::string> oSetUsedSetFieldsUC;

const auto poSrcLayerDefn = oSrcLayer.GetLayerDefn();
const auto poSrcLayerDefn = m_oSrcLayer.GetLayerDefn();
for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i)
{
const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i);
Expand Down Expand Up @@ -157,26 +168,70 @@ class GDALVectorSelectAlgorithmLayer final
{
CPLError(bStrict ? CE_Failure : CE_Warning, CPLE_AppDefined,
"Field '%s' does not exist in layer '%s'.%s",
osName.c_str(), oSrcLayer.GetDescription(),
osName.c_str(), m_oSrcLayer.GetDescription(),
bStrict ? " You may specify "
"--ignore-missing-fields to skip it"
: " It will be ignored");
if (bStrict)
m_bIsOK = false;
return false;
}
}
}
}

~GDALVectorSelectAlgorithmLayer() override
{
if (m_poFeatureDefn)
m_poFeatureDefn->Dereference();
return true;
}

bool IsOK() const
void ExcludeFields(const std::vector<std::string> &fields)
{
return m_bIsOK;
std::set<std::string> oSetSelFields;
std::set<std::string> oSetSelFieldsUC;
for (const std::string &osFieldName : fields)
{
oSetSelFields.insert(osFieldName);
oSetSelFieldsUC.insert(CPLString(osFieldName).toupper());
}

const auto poSrcLayerDefn = m_oSrcLayer.GetLayerDefn();
for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i)
{
const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i);
auto oIter = oSetSelFieldsUC.find(
CPLString(poSrcFieldDefn->GetNameRef()).toupper());
if (oIter != oSetSelFieldsUC.end())
{
m_anMapSrcFieldsToDstFields.push_back(-1);
}
else
{
m_anMapSrcFieldsToDstFields.push_back(
m_poFeatureDefn->GetFieldCount());
OGRFieldDefn oDstFieldDefn(*poSrcFieldDefn);
m_poFeatureDefn->AddFieldDefn(&oDstFieldDefn);
}
}

if (oSetSelFieldsUC.find(
CPLString(OGR_GEOMETRY_DEFAULT_NON_EMPTY_NAME).toupper()) !=
oSetSelFieldsUC.end() &&
poSrcLayerDefn->GetGeomFieldCount() == 1)
{
// exclude default geometry field
}
else
{
for (int i = 0; i < poSrcLayerDefn->GetGeomFieldCount(); ++i)
{
const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(i);
auto oIter = oSetSelFieldsUC.find(
CPLString(poSrcFieldDefn->GetNameRef()).toupper());
if (oIter == oSetSelFieldsUC.end())
{
m_anMapDstGeomFieldsToSrcGeomFields.push_back(i);
OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn);
m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn);
}
}
}
}

OGRFeatureDefn *GetLayerDefn() override
Expand Down Expand Up @@ -258,11 +313,17 @@ bool GDALVectorSelectAlgorithm::RunStep(GDALProgressFunc, void *)

for (auto &&poSrcLayer : poSrcDS->GetLayers())
{
auto poLayer = std::make_unique<GDALVectorSelectAlgorithmLayer>(
*poSrcLayer, m_selectedFields,
/* bStrict = */ !m_ignoreMissingFields);
if (!poLayer->IsOK())
return false;
auto poLayer =
std::make_unique<GDALVectorSelectAlgorithmLayer>(*poSrcLayer);
if (m_exclude)
{
poLayer->ExcludeFields(m_fields);
}
else
{
if (!poLayer->IncludeFields(m_fields, !m_ignoreMissingFields))
return false;
}
outDS->AddLayer(std::move(poLayer));
}

Expand Down
3 changes: 2 additions & 1 deletion apps/gdalalg_vector_select.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ class GDALVectorSelectAlgorithm /* non final */
private:
bool RunStep(GDALProgressFunc pfnProgress, void *pProgressData) override;

std::vector<std::string> m_selectedFields{};
std::vector<std::string> m_fields{};
bool m_ignoreMissingFields = false;
bool m_exclude = false;
};

/************************************************************************/
Expand Down
85 changes: 85 additions & 0 deletions autotest/utilities/test_gdalalg_vector_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,88 @@ def test_gdalalg_vector_select_fields_non_existing_ignore_missing_fields(tmp_vsi
lyr = ds.GetLayer(0)
assert lyr.GetLayerDefn().GetFieldCount() == 1
assert lyr.GetLayerDefn().GetGeomFieldCount() == 1


def test_gdalalg_vector_select_fields_exclude(tmp_vsimem):

out_filename = str(tmp_vsimem / "out.shp")

select_alg = get_select_alg()
assert select_alg.ParseRunAndFinalize(
[
"--exclude",
"--fields=EAS_ID,i_do_not_exist",
"../ogr/data/poly.shp",
out_filename,
]
)

with gdal.OpenEx(out_filename) as ds:
lyr = ds.GetLayer(0)
lyr_defn = lyr.GetLayerDefn()
assert [
lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount())
] == ["AREA", "PRFEDEA"]
assert lyr_defn.GetGeomFieldCount() == 1


def test_gdalalg_vector_select_fields_exclude_ogr_geometry(tmp_vsimem):

out_filename = str(tmp_vsimem / "out.dbf")

select_alg = get_select_alg()
assert select_alg.ParseRunAndFinalize(
["--exclude", "--fields=_ogr_geometry_", "../ogr/data/poly.shp", out_filename]
)

with gdal.OpenEx(out_filename) as ds:
lyr = ds.GetLayer(0)
lyr_defn = lyr.GetLayerDefn()
assert [
lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount())
] == ["AREA", "EAS_ID", "PRFEDEA"]
assert lyr_defn.GetGeomFieldCount() == 0


@pytest.mark.require_driver("GPKG")
def test_gdalalg_vector_select_fields_exclude_name_geom_fields(tmp_vsimem):

tmp_filename = str(tmp_vsimem / "tmp.gpkg")
out_filename = str(tmp_vsimem / "out.dbf")

gdal.VectorTranslate(tmp_filename, "../ogr/data/poly.shp")

select_alg = get_select_alg()
assert select_alg.ParseRunAndFinalize(
["--exclude", "--fields=geom", tmp_filename, out_filename]
)

with gdal.OpenEx(out_filename) as ds:
lyr = ds.GetLayer(0)
lyr_defn = lyr.GetLayerDefn()
assert [
lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount())
] == ["AREA", "EAS_ID", "PRFEDEA"]
assert lyr_defn.GetGeomFieldCount() == 0


@pytest.mark.require_driver("GPKG")
def test_gdalalg_vector_select_fields_exclude_name_geom_fields_not_excluded(tmp_vsimem):

tmp_filename = str(tmp_vsimem / "tmp.gpkg")
out_filename = str(tmp_vsimem / "out.dbf")

gdal.VectorTranslate(tmp_filename, "../ogr/data/poly.shp")

select_alg = get_select_alg()
assert select_alg.ParseRunAndFinalize(
["--exclude", "--fields=i_do_not_exist", tmp_filename, out_filename]
)

with gdal.OpenEx(out_filename) as ds:
lyr = ds.GetLayer(0)
lyr_defn = lyr.GetLayerDefn()
assert [
lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount())
] == ["AREA", "EAS_ID", "PRFEDEA"]
assert lyr_defn.GetGeomFieldCount() == 1
6 changes: 4 additions & 2 deletions doc/source/programs/gdal_vector_pipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,13 @@ Details for options can be found in :ref:`gdal_vector_filter_subcommand`.
Select a subset of fields from a vector dataset.
Positional arguments:
--fields <FIELDS> Selected fields [may be repeated] [required]
--fields <FIELDS> Fields to select (or exclude if --exclude) [may be repeated] [required]
Options:
--exclude Exclude specified fields
Mutually exclusive with --ignore-missing-fields
--ignore-missing-fields Ignore missing fields
Mutually exclusive with --exclude
Details for options can be found in :ref:`gdal_vector_select_subcommand`.
Expand Down
22 changes: 19 additions & 3 deletions doc/source/programs/gdal_vector_select.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Synopsis
Positional arguments:
-i, --input <INPUT> Input vector dataset [required]
-o, --output <OUTPUT> Output vector dataset [required]
--fields <FIELDS> Selected fields [may be repeated] [required]
--fields <FIELDS> Fields to select (or exclude if --exclude) [may be repeated] [required]
Common Options:
-h, --help Display help message and exit
Expand All @@ -42,8 +42,10 @@ Synopsis
--overwrite-layer Whether overwriting existing layer is allowed
--append Whether appending to existing layer is allowed
--output-layer <OUTPUT-LAYER> Output layer name
--fields <FIELDS> Selected fields [may be repeated]
--exclude Exclude specified fields
Mutually exclusive with --ignore-missing-fields
--ignore-missing-fields Ignore missing fields
Mutually exclusive with --exclude
Advanced Options:
--if, --input-format <INPUT-FORMAT> Input formats [may be repeated]
Expand All @@ -68,7 +70,8 @@ Standard options

.. option:: --fields <FIELDS>

Comma-separated list of fields from input layer to copy to the new layer.
Comma-separated list of fields from input layer to copy to the new layer
(or to exclude if :option:`--exclude` is specified)

Field names with spaces, commas or double-quote
should be surrounded with a starting and ending double-quote character, and
Expand Down Expand Up @@ -97,6 +100,11 @@ Standard options
When specifying :option:`--ignore-missing-fields`, only a warning is
emitted and the non existing fields are just ignored.

.. option:: --exclude

Modifies the behavior of the algorithm such that all fields are selected,
except the ones mentioned by :option:`--fields`.


Advanced options
++++++++++++++++
Expand All @@ -114,3 +122,11 @@ Examples
.. code-block:: bash
$ gdal vector select in.shp out.gpkg "EAS_ID,_ogr_geometry_" --overwrite
.. example::
:title: Remove sensitive fields from a layer
.. code-block:: bash
$ gdal vector select in.shp out.gpkg --exclude "name,surname,address" --overwrite

0 comments on commit 32d4ef0

Please sign in to comment.