Skip to content

Commit bf9458e

Browse files
libailinzoudaokoulife
authored andcommitted
[Feature-#1889][ftp] read support column index and sheetNO
1 parent 994c2d1 commit bf9458e

File tree

7 files changed

+89
-4
lines changed

7 files changed

+89
-4
lines changed

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/client/excel/ExcelReaderExecutor.java

+19-2
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,39 @@
1818

1919
package com.dtstack.chunjun.connector.ftp.client.excel;
2020

21+
import com.dtstack.chunjun.connector.ftp.extend.ftp.IFormatConfig;
22+
2123
import com.alibaba.excel.ExcelReader;
24+
import com.alibaba.excel.read.metadata.ReadSheet;
25+
26+
import java.util.ArrayList;
27+
import java.util.List;
2228

2329
public class ExcelReaderExecutor implements Runnable {
2430

2531
private final ExcelReader reader;
2632
private ExcelSubExceptionCarrier ec;
33+
private IFormatConfig config;
2734

28-
public ExcelReaderExecutor(ExcelReader reader, ExcelSubExceptionCarrier ec) {
35+
public ExcelReaderExecutor(
36+
ExcelReader reader, ExcelSubExceptionCarrier ec, IFormatConfig config) {
2937
this.reader = reader;
3038
this.ec = ec;
39+
this.config = config;
3140
}
3241

3342
@Override
3443
public void run() {
3544
try {
36-
reader.readAll();
45+
if (config.getSheetNo() != null) {
46+
List<ReadSheet> readSheetList = new ArrayList<>();
47+
for (int i = 0; i < config.getSheetNo().size(); i++) {
48+
readSheetList.add(new ReadSheet(config.getSheetNo().get(i)));
49+
}
50+
reader.read(readSheetList);
51+
} else {
52+
reader.readAll();
53+
}
3754
} catch (Exception e) {
3855
ec.setThrowable(e);
3956
} finally {

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/config/FtpConfig.java

+7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import lombok.Data;
2626
import lombok.EqualsAndHashCode;
2727

28+
import java.util.List;
2829
import java.util.Map;
2930

3031
import static com.dtstack.chunjun.connector.ftp.config.ConfigConstants.DEFAULT_FTP_PORT;
@@ -91,4 +92,10 @@ public void setDefaultPort() {
9192
port = DEFAULT_FTP_PORT;
9293
}
9394
}
95+
96+
/** 工作表 */
97+
public List<Integer> sheetNo;
98+
99+
/** 字段对应的列索引 */
100+
public List<Integer> columnIndex;
94101
}

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/extend/ftp/IFormatConfig.java

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import lombok.Data;
2222

2323
import java.io.Serializable;
24+
import java.util.List;
2425
import java.util.Map;
2526

2627
@Data
@@ -45,4 +46,6 @@ public class IFormatConfig implements Serializable {
4546

4647
/* 行分隔符 */
4748
private String columnDelimiter;
49+
50+
public List<Integer> sheetNo;
4851
}

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/iformat/ExcelFileFormat.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public void open(File file, InputStream inputStream, IFormatConfig config) {
8888
.namingPattern("excel-schedule-pool-%d")
8989
.daemon(false)
9090
.build());
91-
ExcelReaderExecutor executor = new ExcelReaderExecutor(reader, ec);
91+
ExcelReaderExecutor executor = new ExcelReaderExecutor(reader, ec, config);
9292
executorService.execute(executor);
9393
}
9494

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/options/FtpOptions.java

+10
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,14 @@ public class FtpOptions extends BaseFileOptions {
9696
.stringType()
9797
.noDefaultValue()
9898
.withDescription("compress type");
99+
public static final ConfigOption<String> SHEET_NO =
100+
ConfigOptions.key("sheet-no")
101+
.stringType()
102+
.noDefaultValue()
103+
.withDescription("sheet no, Multiple numbers separated by commas(,)");
104+
public static final ConfigOption<String> COLUMN_INDEX =
105+
ConfigOptions.key("column-index")
106+
.stringType()
107+
.noDefaultValue()
108+
.withDescription("column index, Multiple numbers separated by commas(,)");
99109
}

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/source/FtpInputFormat.java

+19
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,24 @@ protected RowData nextRecordInternal(RowData rowData) throws ReadRecordException
173173
}
174174

175175
if (rowConverter instanceof FtpSqlConverter) {
176+
// 处理字段配置了对应的列索引
177+
if (ftpConfig.getColumnIndex() != null) {
178+
List<FieldConfig> columns = ftpConfig.getColumn();
179+
String[] fieldsData = new String[columns.size()];
180+
for (int i = 0; i < CollectionUtils.size(columns); i++) {
181+
FieldConfig fieldConfig = columns.get(i);
182+
if (fieldConfig.getIndex() >= fields.length) {
183+
String errorMessage =
184+
String.format(
185+
"The column index is greater than the data size."
186+
+ " The current column index is [%s], but the data size is [%s]. Data loss may occur.",
187+
fieldConfig.getIndex(), fields.length);
188+
throw new IllegalArgumentException(errorMessage);
189+
}
190+
fieldsData[i] = fields[fieldConfig.getIndex()];
191+
}
192+
fields = fieldsData;
193+
}
176194
// 解决数据里包含特殊符号(逗号、换行符)
177195
rowData = rowConverter.toInternal(fields);
178196
} else if (rowConverter instanceof FtpSyncConverter) {
@@ -278,6 +296,7 @@ private IFormatConfig buildIFormatConfig(FtpConfig ftpConfig) {
278296
iFormatConfig.setFetchMaxSize(ftpConfig.getMaxFetchSize());
279297
iFormatConfig.setParallelism(ftpConfig.getParallelism());
280298
iFormatConfig.setColumnDelimiter(ftpConfig.getColumnDelimiter());
299+
iFormatConfig.setSheetNo(ftpConfig.getSheetNo());
281300

282301
return iFormatConfig;
283302
}

Diff for: chunjun-connectors/chunjun-connector-ftp/src/main/java/com/dtstack/chunjun/connector/ftp/table/FtpDynamicTableFactory.java

+30-1
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,11 @@
5050
import org.apache.commons.lang3.StringUtils;
5151

5252
import java.util.ArrayList;
53+
import java.util.Arrays;
5354
import java.util.HashSet;
5455
import java.util.List;
5556
import java.util.Set;
57+
import java.util.stream.Collectors;
5658

5759
public class FtpDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory {
5860

@@ -99,6 +101,20 @@ private static FtpConfig getFtpConfByOptions(ReadableConfig config) {
99101
if (config.get(FtpOptions.FIRST_LINE_HEADER) != null) {
100102
ftpConfig.setFirstLineHeader(config.get(FtpOptions.FIRST_LINE_HEADER));
101103
}
104+
if (StringUtils.isNotBlank(config.get(FtpOptions.SHEET_NO))) {
105+
List<Integer> sheetNo =
106+
Arrays.stream(config.get(FtpOptions.SHEET_NO).split(","))
107+
.map(Integer::parseInt)
108+
.collect(Collectors.toList());
109+
ftpConfig.setSheetNo(sheetNo);
110+
}
111+
if (StringUtils.isNotBlank(config.get(FtpOptions.COLUMN_INDEX))) {
112+
List<Integer> columnIndex =
113+
Arrays.stream(config.get(FtpOptions.COLUMN_INDEX).split(","))
114+
.map(Integer::parseInt)
115+
.collect(Collectors.toList());
116+
ftpConfig.setColumnIndex(columnIndex);
117+
}
102118
return ftpConfig;
103119
}
104120

@@ -118,13 +134,24 @@ public DynamicTableSource createDynamicTableSource(Context context) {
118134

119135
List<Column> columns = resolvedSchema.getColumns();
120136
FtpConfig ftpConfig = getFtpConfByOptions(config);
137+
if (ftpConfig.getColumnIndex() != null
138+
&& columns.size() != ftpConfig.getColumnIndex().size()) {
139+
throw new IllegalArgumentException(
140+
String.format(
141+
"The number of fields (%s) is inconsistent with the number of indexes (%s).",
142+
columns.size(), ftpConfig.getColumnIndex().size()));
143+
}
121144
List<FieldConfig> columnList = new ArrayList<>(columns.size());
122145
for (Column column : columns) {
123146
FieldConfig field = new FieldConfig();
124147
field.setName(column.getName());
125148
field.setType(
126149
TypeConfig.fromString(column.getDataType().getLogicalType().asSummaryString()));
127-
field.setIndex(columns.indexOf(column));
150+
int index =
151+
ftpConfig.getColumnIndex() != null
152+
? ftpConfig.getColumnIndex().get(columns.indexOf(column))
153+
: columns.indexOf(column);
154+
field.setIndex(index);
128155
columnList.add(field);
129156
}
130157
ftpConfig.setColumn(columnList);
@@ -199,6 +226,8 @@ public Set<ConfigOption<?>> optionalOptions() {
199226
options.add(FtpOptions.COMPRESS_TYPE);
200227
options.add(BaseFileOptions.NEXT_CHECK_ROWS);
201228
options.add(BaseFileOptions.WRITE_MODE);
229+
options.add(FtpOptions.SHEET_NO);
230+
options.add(FtpOptions.COLUMN_INDEX);
202231
return options;
203232
}
204233
}

0 commit comments

Comments
 (0)