Skip to content

Commit d2cc6e7

Browse files
committed
Merge branch 'main' into develop/hsc/clean
# Conflicts: # backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTask.java # scripts/db/data-cleaning-init.sql
2 parents f31fc0d + 91b1e08 commit d2cc6e7

File tree

53 files changed

+771
-440
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+771
-440
lines changed

README-zh.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
![GitHub Forks](https://img.shields.io/github/forks/ModelEngine-Group/DataMate)
99
![GitHub Issues](https://img.shields.io/github/issues/ModelEngine-Group/DataMate)
1010
![GitHub License](https://img.shields.io/github/license/ModelEngine-Group/datamate-docs)
11+
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/ModelEngine-Group/DataMate)
1112

1213
**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。**
1314

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
![GitHub Forks](https://img.shields.io/github/forks/ModelEngine-Group/DataMate)
99
![GitHub Issues](https://img.shields.io/github/issues/ModelEngine-Group/DataMate)
1010
![GitHub License](https://img.shields.io/github/license/ModelEngine-Group/datamate-docs)
11+
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/ModelEngine-Group/DataMate)
1112

1213
**DataMate is an enterprise-level data processing platform for model fine-tuning and RAG retrieval, supporting core
1314
functions such as data collection, data management, operator marketplace, data cleaning, data synthesis, data

backend/api-gateway/src/main/java/com/datamate/gateway/common/filter/UserContextFilter.java renamed to backend/api-gateway/src/main/java/com/datamate/gateway/common/filter/AuthFilter.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import com.fasterxml.jackson.databind.ObjectMapper;
88
import lombok.RequiredArgsConstructor;
99
import lombok.extern.slf4j.Slf4j;
10+
import org.apache.commons.lang3.StringUtils;
1011
import org.springframework.beans.factory.annotation.Value;
1112
import org.springframework.cloud.gateway.filter.GatewayFilterChain;
1213
import org.springframework.cloud.gateway.filter.GlobalFilter;
@@ -22,17 +23,19 @@
2223
import java.nio.charset.StandardCharsets;
2324

2425
/**
25-
* 用户信息过滤器
26+
* 鉴权过滤器
2627
*
2728
*/
2829
@Slf4j
2930
@Component
3031
@RequiredArgsConstructor
31-
public class UserContextFilter implements GlobalFilter {
32+
public class AuthFilter implements GlobalFilter {
3233
private static final String AUTH_HEADER = "Authorization";
3334

3435
private static final String TOKEN_PREFIX = "Bearer ";
3536

37+
private static final String USER_HEADER = "User";
38+
3639
private final UserService userService;
3740

3841
@Value("${datamate.jwt.enable:false}")
@@ -55,10 +58,22 @@ public Mono<Void> filter(ServerWebExchange exchange, GatewayFilterChain chain) {
5558
return sendUnauthorizedResponse(exchange);
5659
}
5760
String token = authHeader.substring(TOKEN_PREFIX.length());
58-
if (!userService.validateToken(token)) {
61+
String user = userService.validateToken(token);
62+
if (StringUtils.isBlank(user)) {
5963
return sendUnauthorizedResponse(exchange);
6064
}
61-
return chain.filter(exchange);
65+
// 4. 创建新的请求
66+
ServerHttpRequest mutatedRequest = request.mutate()
67+
.headers(httpHeaders -> {
68+
// 或者直接操作headers
69+
httpHeaders.add(USER_HEADER, user);
70+
})
71+
.build();
72+
// 5. 使用新的请求创建新的exchange
73+
ServerWebExchange mutatedExchange = exchange.mutate()
74+
.request(mutatedRequest)
75+
.build();
76+
return chain.filter(mutatedExchange);
6277
} catch (Exception e) {
6378
log.error("get current user info error", e);
6479
return sendUnauthorizedResponse(exchange);

backend/api-gateway/src/main/java/com/datamate/gateway/domain/service/UserService.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
44
import com.datamate.gateway.domain.entity.User;
55
import com.datamate.gateway.domain.repository.UserRepository;
6-
import io.jsonwebtoken.JwtException;
7-
import io.jsonwebtoken.Jwts;
8-
import io.jsonwebtoken.SignatureAlgorithm;
6+
import io.jsonwebtoken.*;
97
import io.jsonwebtoken.security.Keys;
108
import lombok.RequiredArgsConstructor;
119
import org.springframework.beans.factory.annotation.Value;
@@ -26,6 +24,8 @@
2624
@Service
2725
@RequiredArgsConstructor
2826
public class UserService {
27+
private static final String SYSTEM_USER = "system";
28+
2929
private final UserRepository userRepository;
3030

3131
@Value("${datamate.jwt.expiration-seconds:3600}")
@@ -70,12 +70,12 @@ private String generateToken(User user) {
7070
.compact();
7171
}
7272

73-
public boolean validateToken(String token) {
73+
public String validateToken(String token) {
7474
try {
75-
Jwts.parser().setSigningKey(secret.getBytes()).parseClaimsJws(token);
76-
return true;
75+
Jws<Claims> claimsJws = Jwts.parserBuilder().setSigningKey(Keys.hmacShaKeyFor(secret.getBytes(StandardCharsets.UTF_8))).build().parseClaimsJws(token);
76+
return claimsJws.getBody().getSubject();
7777
} catch (JwtException | IllegalArgumentException ex) {
78-
return false;
78+
return null;
7979
}
8080
}
8181

@@ -89,7 +89,7 @@ public Optional<User> register(RegisterRequest registerRequest) {
8989
// Check if username already exists
9090
LambdaQueryWrapper<User> usernameQuery = new LambdaQueryWrapper<>();
9191
usernameQuery.eq(User::getUsername, registerRequest.getUsername());
92-
if (userRepository.getOne(usernameQuery) != null) {
92+
if (userRepository.getOne(usernameQuery) != null || SYSTEM_USER.equals(registerRequest.getUsername())) {
9393
return Optional.empty();
9494
}
9595

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTask.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.baomidou.mybatisplus.annotation.TableName;
44
import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum;
5+
import com.datamate.common.domain.model.base.BaseEntity;
56
import lombok.Getter;
67
import lombok.Setter;
78

@@ -14,9 +15,7 @@
1415
@Getter
1516
@Setter
1617
@TableName(value = "t_clean_task", autoResultMap = true)
17-
public class CleaningTask {
18-
private String id;
19-
18+
public class CleaningTask extends BaseEntity<String> {
2019
private String name;
2120

2221
private String description;
@@ -39,8 +38,6 @@ public class CleaningTask {
3938

4039
private Integer retryCount;
4140

42-
private LocalDateTime createdAt;
43-
4441
private LocalDateTime startedAt;
4542

4643
private LocalDateTime finishedAt;

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
44
import com.datamate.cleaning.domain.model.entity.CleaningResult;
5+
import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation;
56
import org.apache.ibatis.annotations.Mapper;
67

78
@Mapper
9+
@IgnoreDataScopeAnnotation
810
public interface CleaningResultMapper extends BaseMapper<CleaningResult> {
911
}

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTemplateMapper.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import com.baomidou.mybatisplus.core.toolkit.Constants;
66
import com.datamate.cleaning.domain.model.entity.TemplateWithInstance;
77
import com.datamate.cleaning.domain.model.entity.CleaningTemplate;
8+
import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation;
89
import org.apache.ibatis.annotations.Mapper;
910
import org.apache.ibatis.annotations.Param;
1011
import org.apache.ibatis.annotations.Select;
1112

1213
import java.util.List;
1314

1415
@Mapper
16+
@IgnoreDataScopeAnnotation
1517
public interface CleaningTemplateMapper extends BaseMapper<CleaningTemplate> {
1618
@Select("SELECT t.id AS id, name, description, created_at, updated_at, created_by, operator_id, op_index, " +
1719
"settings_override FROM t_clean_template t LEFT JOIN t_operator_instance o ON t.id = o.instance_id " +

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/OperatorInstanceMapper.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
44
import com.datamate.cleaning.domain.model.entity.OperatorInstance;
5+
import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation;
56
import com.datamate.operator.domain.model.OperatorView;
67
import org.apache.ibatis.annotations.Mapper;
78
import org.apache.ibatis.annotations.Select;
@@ -10,6 +11,7 @@
1011

1112

1213
@Mapper
14+
@IgnoreDataScopeAnnotation
1315
public interface OperatorInstanceMapper extends BaseMapper<OperatorInstance> {
1416
@Select("SELECT o.operator_id as id, o.operator_name as name, o.description, o.version, o.inputs, o.outputs, " +
1517
"o.runtime, o.settings, o.created_at, o.updated_at, " +

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ public AllDatasetStatisticsResponse getAllDatasetStatistics() {
244244
public void processDataSourceAsync(String datasetId, String dataSourceId) {
245245
try {
246246
log.info("Initiating data source file scanning, dataset ID: {}, collection task ID: {}", datasetId, dataSourceId);
247-
List<String> filePaths = getFilePaths(dataSourceId);
247+
List<String> filePaths = getFilePaths(dataSourceId, datasetRepository.getById(datasetId));
248248
if (CollectionUtils.isEmpty(filePaths)) {
249249
return;
250250
}
@@ -255,8 +255,8 @@ public void processDataSourceAsync(String datasetId, String dataSourceId) {
255255
}
256256
}
257257

258-
private List<String> getFilePaths(String dataSourceId) {
259-
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
258+
private List<String> getFilePaths(String dataSourceId, Dataset dataset) {
259+
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId, dataset.getCreatedBy()).getData();
260260
if (taskDetail == null) {
261261
log.warn("Fail to get collection task detail, task ID: {}", dataSourceId);
262262
return Collections.emptyList();

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId,
137137

138138
return new PagedResponse<>(page, size, total, totalPages, datasetFiles);
139139
} catch (IOException e) {
140-
log.error("list dataset path error", e);
140+
log.warn("list dataset path error");
141141
return PagedResponse.of(new Page<>(page, size));
142142
}
143143
}

0 commit comments

Comments
 (0)