diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java new file mode 100644 index 0000000000000..a93f09d42b767 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java @@ -0,0 +1,276 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FileContent; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.IssueData; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.IssueType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.*; + +/** + * Utility methods to launch the diagnostic scripts. + */ +@InterfaceAudience.Private +public final class DiagnosticsService { + private static final Logger LOG = LoggerFactory + .getLogger(DiagnosticsService.class); + private static final String PYTHON_COMMAND = "python3"; + private static final String COLON = ":"; + private static final String COMMA = ","; + private static final String OUT_DIR_PREFIX = "/tmp"; + private static final String EXECUTION_ERROR_MESSAGE = "Error occurred " + + "during the execution of the diagnostic script with the command '{}'."; + private static final String INCORRECT_NUMBER_OF_PARAMETERS_MESSAGE = + "Error while parsing diagnostic option, incorrect number of " + + "parameters. Expected 1 or 2, but got {}. Skipping this option."; + private static String scriptLocation; + + private DiagnosticsService() { + // hidden constructor + } + + public static CommonIssues listCommonIssues() throws Exception { + if (Shell.WINDOWS) { + throw new UnsupportedOperationException("Not implemented for Windows."); + } + CommonIssues issueTypes = new CommonIssues(); + ProcessBuilder pb = createProcessBuilder(CommandArgument.LIST_ISSUES); + + List result = executeCommand(pb); + + System.out.println("-----------------------------Common Issue Result: " + result); + + for (String line : result) { + issueTypes.add(parseIssueType(line)); + } + + return issueTypes; + } + + public static IssueData collectIssueData(String issueId, List args) + throws Exception { + if (Shell.WINDOWS) { + throw new UnsupportedOperationException("Not implemented for Windows."); + } + ProcessBuilder pb = createProcessBuilder(CommandArgument.COMMAND, issueId, + args); + + LOG.info("Diagnostic process environment: {}", pb.environment()); + + List result = executeCommand(pb); + Optional outputDirectory = result.stream() + .filter(e -> e.contains(OUT_DIR_PREFIX)) + .findFirst(); + + if (!outputDirectory.isPresent()) { + LOG.error(EXECUTION_ERROR_MESSAGE, pb.command()); + throw new IOException("Output directory in result not found."); + } + + + return collectIssueFilesContent(new File(outputDirectory.get())); + } + + public static IssueData collectIssueFilesContent(File currentDir){ + IssueData issueData = new IssueData(); + File[] files = currentDir.listFiles(); + + if (!currentDir.exists()){ + LOG.error("Directory does not exist: {}", currentDir); + return issueData; + } + + if (files == null) { + return issueData; + } + + for (File file : files) { + try { + if (file.isDirectory()) { + issueData.getFiles().addAll(collectIssueFilesContent(file).getFiles()); + } else { + String contentType = Files.probeContentType(file.toPath()); + if (contentType == null) contentType = "text/plain"; // Default if not found + + String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); + FileContent fileContent = new FileContent(); + fileContent.setFilename(currentDir.toPath().relativize(file.toPath()).toString()); // Set the relative path as name + fileContent.setContentType(contentType); + fileContent.setContent(content); + + issueData.getFiles().add(fileContent); + } + } catch (IOException e) { + LOG.error("Failed to process {}: {}", file, e.getMessage()); + } + } + return issueData; + } + + @VisibleForTesting + protected static ProcessBuilder createProcessBuilder(CommandArgument argument) throws IOException { + return createProcessBuilder(argument, null, null); + } + + @VisibleForTesting + protected static ProcessBuilder createProcessBuilder( + CommandArgument argument, String issueId, List additionalArgs) throws IOException { + List commandList = + new ArrayList<>(Arrays.asList(PYTHON_COMMAND, getScriptLocation(), + argument.getShortOption())); + + if (argument.equals(CommandArgument.COMMAND)) { + commandList.add(issueId); + if (additionalArgs != null) { + commandList.add(CommandArgument.ARGUMENTS.getShortOption()); + commandList.addAll(additionalArgs); + } + } + + return new ProcessBuilder(commandList); + } + + private static List executeCommand(ProcessBuilder pb) + throws Exception { + Process process = pb.start(); + int exitCode; + List result = new ArrayList<>(); + + try ( + BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(process.getInputStream(), + StandardCharsets.UTF_8)); + BufferedReader stderrReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), + StandardCharsets.UTF_8)); + ) { + + String line; + while ((line = stdoutReader.readLine()) != null) { + result.add(line); + } + + List errors = new ArrayList<>(); + while ((line = stderrReader.readLine()) != null) { + errors.add(line); + } + if (!errors.isEmpty()) { + LOG.error("Python script stderr: {}", errors); + } + + process.waitFor(); + } catch (Exception e) { + LOG.error(EXECUTION_ERROR_MESSAGE, pb.command()); + throw e; + } + exitCode = process.exitValue(); + if (exitCode != 0) { + throw new IOException("The collector script exited with non-zero " + + "exit code: " + exitCode); + } + + return result; + } + + @VisibleForTesting + protected static IssueType parseIssueType(String line) { + String[] issueParams = line.split(COLON); + IssueType parsedIssueType; + + if (issueParams.length < 1 || issueParams.length > 2) { + LOG.warn(INCORRECT_NUMBER_OF_PARAMETERS_MESSAGE, + issueParams.length); + return null; + } else { + String name = issueParams[0]; + parsedIssueType = new IssueType(name); + if (issueParams.length == 2) { + List parameterList = + Arrays.asList(issueParams[1].split(COMMA)); + parsedIssueType.setParameters(parameterList); + } + } + + return parsedIssueType; + } + + @VisibleForTesting + protected static void setScriptLocation(String scriptLocationParam) { + scriptLocation = scriptLocationParam; + } + + enum CommandArgument{ + LIST_ISSUES("-l"), + COMMAND("-c"), + ARGUMENTS("-a"); + + private final String shortOption; + + CommandArgument(String shortOption) { + this.shortOption = shortOption; + } + + public CommandArgument fromString(String option) { + for (CommandArgument arg : CommandArgument.values()) { + if (arg.shortOption.equals(option)) { + return arg; + } + } + return null; + } + + public String getShortOption() { + return shortOption; + } + + } + + private static String getScriptLocation() throws IOException { + if (scriptLocation != null) { + return scriptLocation; + } + + InputStream in = DiagnosticsService.class.getClassLoader() + .getResourceAsStream("diagnostics/diagnostics_collector.py"); + + if (in == null) { + throw new FileNotFoundException( + "Resource diagnostics/diagnostics_collector.py not found in classpath"); + } + + File tempScript = File.createTempFile("diagnostics_collector", ".py"); + Files.copy(in, tempScript.toPath(), StandardCopyOption.REPLACE_EXISTING); + tempScript.setExecutable(true); + + scriptLocation = tempScript.getAbsolutePath(); + return scriptLocation; + } + + +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java index de75631486b92..c9c48eaa217cc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java @@ -42,6 +42,13 @@ public final class RMWSConsts { /** Path for {@code RMWebServiceProtocol#getClusterMetricsInfo}. */ public static final String METRICS = "/metrics"; + /** Path for {@code RMWebServices#getCommonIssueList}. */ + public static final String COMMON_ISSUE_LIST = "/common-issues/list"; + + /** Path for {@code RMWebServices#getCommonIssueData}. */ + public static final String COMMON_ISSUE_COLLECT = + "/common-issues/collect"; + /** Path for {@code RMWebServiceProtocol#getSchedulerInfo}. */ public static final String SCHEDULER = "/scheduler"; @@ -219,6 +226,8 @@ public final class RMWSConsts { // ----------------QueryParams for RMWebServiceProtocol---------------- + public static final String ISSUEID = "issueId"; + public static final String ISSUEARGS = "args"; public static final String TIME = "time"; public static final String STATES = "states"; public static final String NODEID = "nodeId"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java index 47de8ee591afd..0951c9f5e026c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import java.io.IOException; +import java.util.List; import java.util.Set; import javax.servlet.http.HttpServletRequest; @@ -47,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -112,6 +114,25 @@ public interface RMWebServiceProtocol { */ ClusterMetricsInfo getClusterMetricsInfo(); + /** + * This method retrieves the common diagnosable issue list, and it is + * reachable by using {@link RMWSConsts#COMMON_ISSUE_LIST}. + * + * @return the list of available diagnostic cases + */ + CommonIssues getCommonIssueList(); + + /** + * This method retrieves the diagnostic information for the selected issue, + * and it is reachable by using {@link RMWSConsts#COMMON_ISSUE_COLLECT}. + * + * @param issueId the selected issue's ID. It is a QueryParam. + * @param args the necessary arguments for diagnosing the issue. + * It is a QueryParam. + * @return the associated diagnostic information to the selected issue + */ + Response getCommonIssueData(String issueId, List args); + /** * This method retrieves the current scheduler status, and it is reachable by * using {@link RMWSConsts#SCHEDULER}. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index b6ea2d438c0b4..329d49950213d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -137,6 +137,7 @@ import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.AdminService; +import org.apache.hadoop.yarn.server.resourcemanager.DiagnosticsService; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -177,6 +178,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FifoSchedulerInfo; @@ -223,6 +225,7 @@ import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.ForbiddenException; import org.apache.hadoop.yarn.webapp.NotFoundException; +import org.apache.hadoop.yarn.webapp.WebAppException; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.apache.hadoop.yarn.webapp.dao.ConfInfo; import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; @@ -408,6 +411,42 @@ public ClusterMetricsInfo getClusterMetricsInfo() { return new ClusterMetricsInfo(this.rm); } + @GET + @Path(RMWSConsts.COMMON_ISSUE_LIST) + @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, + MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 }) + @Override + public CommonIssues getCommonIssueList() { + initForReadableEndpoints(); + try { + return DiagnosticsService.listCommonIssues(); + } catch (Exception e) { + throw new WebAppException("Error collecting the common " + + "issue types. Error message: " + e.getMessage() + ". " + + "For more information please check the ResourceManager logs."); + } + } + + @GET + @Path(RMWSConsts.COMMON_ISSUE_COLLECT) + @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, + MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 }) + @Override + public Response getCommonIssueData( + @QueryParam(RMWSConsts.ISSUEID) String issueId, + @QueryParam(RMWSConsts.ISSUEARGS) List args) { + initForReadableEndpoints(); + try { + return Response.status(Status.OK) + .entity(DiagnosticsService.collectIssueData(issueId, args)) + .build(); + } catch (Exception e) { + throw new WebAppException("Error collecting the selected " + + "issue data. Error message: " + e.getMessage() + ". " + + "For more information please check the ResourceManager logs."); + } + } + @GET @Path(RMWSConsts.SCHEDULER) @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java new file mode 100644 index 0000000000000..5ccf36eba7bd5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.List; + +@XmlRootElement(name = "commonIssues") +@XmlAccessorType(XmlAccessType.FIELD) +public class CommonIssues { + private List issue = new ArrayList<>(); + + public CommonIssues() {} + + public void add(IssueType type) { + if (type != null && + issue.stream().noneMatch(e-> e.getName().equals(type.getName()))) { + issue.add(type); + } + } + + public List getIssueList() { + return issue; + } + + public void addAll(List issueTypes) { + issue.addAll(issueTypes); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FileContent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FileContent.java new file mode 100644 index 0000000000000..1b7a6a2ffe8bd --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FileContent.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement +public class FileContent { + private String filename; + private String contentType; + private String content; + + public String getFilename() { return filename; } + public void setFilename(String filename) { this.filename = filename; } + + public String getContentType() { return contentType; } + public void setContentType(String contentType) { this.contentType = contentType; } + + public String getContent() { return content; } + public void setContent(String content) { this.content = content; } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueData.java new file mode 100644 index 0000000000000..d205070be9a35 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueData.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.List; + +@XmlRootElement +public class IssueData { + private List files = new ArrayList<>(); + + @XmlElement(name = "file") // To make the endpoint result less ambiguous + public List getFiles() { + return files; + } + + public void setFiles(List files) { this.files = files; } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java new file mode 100644 index 0000000000000..27f96a0ae2b5d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +@XmlRootElement(name = "issue") +@XmlAccessorType(XmlAccessType.FIELD) +public class IssueType { + private String name; + private List parameters = new ArrayList<>(); + + public IssueType() {} + + public IssueType(String name) { + this.name = name; + this.parameters = Collections.emptyList(); + } + + public IssueType(String name, List parameters) { + this.name = name; + this.parameters = new ArrayList<>(parameters); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public List getParameters() { + return parameters; + } + + public void setParameters(List parameters) { + this.parameters = parameters; + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java new file mode 100644 index 0000000000000..8a0af4287f564 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + + + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.IssueType; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.fail; + + +public class DiagnosticsServiceTest { + private static final String ISSUE_NAME_APP_DIAGNOSTIC = "application_diagnostic"; + private static final String ISSUE_NAME_APP_FAILED = "application_failed"; + private static final String ISSUE_NAME_APP_FAILED_NECESSARY_ARGS = + "application_failed_necessary_args"; + private static final String ISSUE_NAME_APP_HANGING = "application_hanging"; + private static final String ISSUE_NAME_SCHED_ISSUE = + "scheduler_related_issue"; + private static final String ISSUE_NAME_RM_NM_ISSUE = "rm_nm_start_failure"; + private static final String ISSUE_ARG_APP_ID = "appId"; + private static final String ISSUE_ARG_NODE_ID = "nodeId"; + private static final String COLON = ":"; + private static final String COMMA = ","; + private static final String OUTPUT_DIR = "/tmp"; + + @Before + public void setUp() { + DiagnosticsService.setScriptLocation("src/test/resources/diagnostics" + + "/diagnostics_collector_test.py"); + handleWindowsRuntime(); + } + + @Test + public void testListCommonIssuesValidCaseWithOptionsToBeSkipped() + throws Exception { + // The test script contains two invalid options: one with an ambiguous name + // and one with too many parameters. These should be skipped silently. + CommonIssues commonIssues = DiagnosticsService.listCommonIssues(); + + Assert.assertEquals(2, commonIssues.getIssueList().size()); + assertIssueEquality(ISSUE_NAME_APP_DIAGNOSTIC, + Collections.singletonList(ISSUE_ARG_APP_ID), + commonIssues.getIssueList().get(0)); + + assertIssueEquality(ISSUE_NAME_SCHED_ISSUE, + Collections.emptyList(), + commonIssues.getIssueList().get(1)); + } + + @Test(expected = IOException.class) + public void testListCommonIssuesScriptMissing() throws Exception { + DiagnosticsService.setScriptLocation("/src/invalidLocation/script.py"); + DiagnosticsService.listCommonIssues(); + } + +// @Test +// public void testCollectIssueDataPathValidOutput() throws Exception { +// // valid case: the script prints out one directory +// Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueDataPath( +// ISSUE_NAME_APP_FAILED, null)); +// } +// +// @Test +// public void testCollectIssueDataPathValidOutputWhenArgsArePresent() +// throws Exception { +// // valid case: appId and nodeId are necessary params and they are present +// Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueDataPath( +// ISSUE_NAME_APP_FAILED_NECESSARY_ARGS, +// Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_NODE_ID))); +// } +// +// @Test(expected = IOException.class) +// public void testCollectIssueDataPathInvalidOutputWhenWrongArgsArePresent() +// throws Exception { +// // valid case: appId and nodeId are necessary params but two appIds are +// // given +// Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueDataPath( +// ISSUE_NAME_APP_FAILED_NECESSARY_ARGS, +// Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_APP_ID))); +// } +// +// @Test(expected = IOException.class) +// public void testCollectIssueDataPathInvalidOutputEmptyDir() throws Exception { +// // invalid case: the script prints out an empty string as directory +// // with the correct prefix +// DiagnosticsService.collectIssueDataPath(ISSUE_NAME_APP_HANGING, null); +// } +// + @Test(expected = IOException.class) + public void testCollectIssueDataPathInvalidOutputMissingOutputDir() + throws Exception { + // invalid case: the script doesn't print out the correct output directory + DiagnosticsService.collectIssueDataPath(ISSUE_NAME_SCHED_ISSUE, null); + } +// +// @Test(expected = IOException.class) +// public void testCollectIssueDataPathInvalidOutputMissingPrints() +// throws Exception { +// // invalid case: the script doesn't print out anything +// DiagnosticsService.collectIssueDataPath(ISSUE_NAME_RM_NM_ISSUE, null); +// } +// + @Test(expected = IOException.class) + public void testCollectIssueDataPathScriptMissing() throws Exception { + DiagnosticsService.setScriptLocation("/src/invalidLocation/script.py"); + DiagnosticsService.collectIssueDataPath(ISSUE_NAME_APP_DIAGNOSTIC, null); + } + + @Test + public void testParseIssueTypeValidCases() { + // valid case: name, no parameters + String line = ISSUE_NAME_APP_FAILED; + + assertIssueEquality(ISSUE_NAME_APP_FAILED, Collections.emptyList(), + DiagnosticsService.parseIssueType(line)); + + // valid case: name, one parameter + line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_ARG_APP_ID; + + assertIssueEquality(ISSUE_NAME_APP_FAILED, + Collections.singletonList(ISSUE_ARG_APP_ID), + DiagnosticsService.parseIssueType(line)); + + // valid case: name, two parameters + line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_ARG_APP_ID + + COMMA + ISSUE_ARG_NODE_ID; + + assertIssueEquality(ISSUE_NAME_APP_FAILED, + Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_NODE_ID), + DiagnosticsService.parseIssueType(line)); + } + + @Test + public void testParseIssueTypeInvalidCases() { + // invalid case: too many values + String line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_NAME_APP_FAILED + + COLON + ISSUE_NAME_APP_FAILED; + + IssueType issueType = DiagnosticsService.parseIssueType(line); + Assert.assertNull(issueType); + } + + private void assertIssueEquality(String expectedIssueName, + List expectedParams, + IssueType actualIssue) { + Assert.assertEquals(expectedIssueName, + actualIssue.getName()); + Assert.assertEquals(expectedParams.size(), + actualIssue.getParameters().size()); + Assert.assertTrue(CollectionUtils.isEqualCollection( + expectedParams, actualIssue.getParameters())); + } + + private void handleWindowsRuntime() { + if (Shell.WINDOWS) { + try { + DiagnosticsService.listCommonIssues(); + fail("On Windows listCommonIssues should throw " + + "UnsupportedOperationException"); + } catch (Exception e) { + // Exception is expected + } + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py new file mode 100644 index 0000000000000..42ced7311e36a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py @@ -0,0 +1,316 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import argparse +import sys, os +import subprocess +from datetime import datetime, timedelta +from urllib import request, error +import xml.etree.ElementTree as ET +import re +import time + +TEMP_DIR = "/tmp" +HADOOP_CONF_DIR = "/etc/hadoop" +YARN_SITE_XML = "yarn-site.xml" +RM_ADDRESS_PROPERTY_NAME = "yarn.resourcemanager.webapp.address" + +RM_LOG_REGEX = r"(?<=\")\/logs.+?RESOURCEMANAGER.+?(?=\")" +NM_LOG_REGEX = r"(?<=\")\/logs.+?NODEMANAGER.+?(?=\")" +INPUT_TIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y' # e.g. Wed May 28 07:35:39 UTC 2025 +OUTPUT_TIME_FORMAT = '%Y-%m-%d %H:%M:%S,%f' # e.g. 2025-05-28 11:57:05,435 +OUTPUT_TIME_FORMAT_WITHOUT_SECOND = '%Y-%m-%d %H:%M' # e.g. 2025-05-28 11:57 +NUMBER_OF_JSTACK = 3 + + +def application_diagnostic(): + """ + Application Logs, Application Info, Application Attempts + Multiple JStack of Hanging Containers and NodeManager + ResourceManager logs during job duration. + NodeManager logs from NodeManager where hanging containers of jobs run during the duration of containers. + """ + + if args.arguments is None or len(args.arguments) == 0: + print("Missing application or job id, exiting...") + sys.exit(os.EX_USAGE) + + app_id = args.arguments[0] + + output_path = create_output_dir(os.path.join(TEMP_DIR, app_id)) + + # Get JStack of the hanging containers + nm_address = get_nodemanager_address(app_id) + app_jstack = create_request("http://{}/ws/v1/node/apps/{}/jstack".format(nm_address, app_id), False) + write_output(output_path, "application_jstack", app_jstack) + + # Get JStack of the hanging NodeManager + nm_jstack = create_request("http://{}/ws/v1/node/jstack".format(nm_address), False) + write_output(output_path, "nm_{}_jstack".format(nm_address), nm_jstack) + + # Get application info + app_info= create_request("http://{}/ws/v1/cluster/apps/{}".format(RM_ADDRESS, app_id)) + write_output(output_path, "application_info", app_info) + + # Get application attempts + app_attempts = create_request("http://{}/ws/v1/cluster/apps/{}/appattempts".format(RM_ADDRESS, app_id)) + write_output(output_path, "application_attempts", app_attempts) + + # Get start_time and end_time of the application + start_time, end_time = get_application_time(app_info) + + # Get RM log + log_address = get_node_log_address(RM_ADDRESS, RM_LOG_REGEX) + write_output(os.path.join(output_path, "node_log"), "resourcemanager_log", + filter_node_log(log_address, start_time, end_time)) + + # Get NodeManager logs in the duration of containers belonging to app_id + if "amHostHttpAddress" in app_info: + app_info = ET.fromstring(app_info) + nm_address = app_info.find("amHostHttpAddress").text + log_address = get_node_log_address(nm_address, NM_LOG_REGEX) + write_output(os.path.join(output_path, "node_log"), "nodemanager_log", get_container_log(log_address, app_id)) + + # Get application log + command = run_cmd_and_save_output(os.path.join(output_path, "app_logs"), app_id, "yarn", "logs", "-applicationId", + app_id) # TODO user permission? + + command.communicate() + return output_path + + +def scheduler_related_issue(): + """ + ResourceManager Scheduler Logs with DEBUG enabled for 2 minutes. + Multiple Jstack of ResourceManager + YARN and Scheduler Configuration + Cluster Scheduler API /ws/v1/cluster/scheduler and Cluster Nodes API /ws/v1/cluster/nodes response + Scheduler Activities /ws/v1/cluster/scheduler/bulk-activities response + """ + output_path = create_output_dir(os.path.join(TEMP_DIR, "scheduler_related_issue" + str(time.time()).split(".")[0])) + + # Multiple JStack of ResourceManager + rm_pids = get_resourcemanager_pid() + jstacks_output = get_multiple_jstack(rm_pids) + write_output(output_path, "jstacks_resourcemanager", jstacks_output) + + # Get Cluster Scheduler Info + scheduler_info = create_request("http://{}/ws/v1/cluster/scheduler".format(RM_ADDRESS)) + write_output(output_path, "scheduler_info", scheduler_info) + + # Get Cluster Nodes Info + nodes_info = create_request("http://{}/ws/v1/cluster/nodes".format(RM_ADDRESS)) + write_output(output_path, "nodemanager_info", nodes_info) + + # Get Scheduler Activities + scheduler_activities = create_request("http://{}/ws/v1/cluster/scheduler/bulk-activities".format(RM_ADDRESS)) + write_output(output_path, "scheduler_activities", scheduler_activities) + + # Get Scheduler Configuration + scheduler_config = create_request("http://{}/ws/v1/cluster/scheduler-conf".format(RM_ADDRESS)) + write_output(output_path, "scheduler_configuration", scheduler_config) + + # Get YARN configuration yarn-site.xml + yarn_conf = run_command("cat", os.path.join(HADOOP_CONF_DIR, YARN_SITE_XML)) + write_output(output_path, "yarn_site", yarn_conf) + + # Get RM Debug log for the last 2 minutes + enable_debug_log = set_rm_scheduler_log_level("DEBUG") + print(enable_debug_log) + log_address = get_node_log_address(RM_ADDRESS, RM_LOG_REGEX) + start_time, end_time = (format_datetime_no_seconds(datetime.now() - timedelta(seconds=120)), + format_datetime_no_seconds(datetime.now())) + rm_debug_log = filter_node_log(log_address, start_time, end_time) + write_output(output_path, "rm_debug_log_2min", rm_debug_log) + enable_info_log = set_rm_scheduler_log_level("INFO") + print(enable_info_log) + + return output_path + +####################################################### Utils Functions ############################################### + + +def list_issues(): + print("application_diagnostic:appId", "scheduler_related_issue", sep="\n") + + +def parse_url_from_conf(conf_file, url_property_name): + root = ET.parse(os.path.join(HADOOP_CONF_DIR, conf_file)) + for prop in root.findall("property"): + prop_name = prop.find("name").text + if prop_name == url_property_name: + return prop.find("value").text + + return None + + +def create_output_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + return dir_path + + +def write_output(output_path, out_filename, value): + output_path = create_output_dir(output_path) + with open(os.path.join(output_path, out_filename), 'w') as f: + f.write(value) + + +def run_command(*argv): + try: + cmd = " ".join(arg for arg in argv) + print("Running command with arguments:", cmd) + response = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, check=True) + response_str = response.stdout.decode('utf-8') + except subprocess.CalledProcessError as e: + response_str = "Command failed with error: {}".format(e) + print("Unable to run command: ", response_str) + except Exception as e: + response_str = "Exception occurred: {}".format(e) + print("Exception occurred: ", response_str) + + return response_str + + +def run_cmd_and_save_output(output_path, out_filename, *argv): + file_path = os.path.join(create_output_dir(output_path), out_filename) + with open(file_path, 'w') as f: + return subprocess.Popen(argv, stdout=f) + + +def create_request(url, xml_type=True): + headers = {} + # TODO auth can be handled here + if xml_type: + headers["Accept"] = "application/xml" + + try: + req = request.Request(url, headers=headers) + response = request.urlopen(req) + response_str = response.read().decode('utf-8') + except error.HTTPError as e: + response_str = "HTTP error occurred: {} - {}".format(e.code, e.reason) + print("Request failed: ", response_str) + except Exception as e: + response_str = "Unexpected error: {}".format(e) + print("Request failed: {}".format(response_str)) + + return response_str + + +def get_nodemanager_address(app_id): + app_info = create_request("http://{}/ws/v1/cluster/apps/{}".format(RM_ADDRESS, app_id)) + app_info_xml = ET.fromstring(app_info) + return app_info_xml.find("amHostHttpAddress").text + + +def get_node_log_address(node_address, link_regex): + try: + log_page = create_request("http://{}/logs/".format(node_address), False) + matches = re.findall(link_regex, log_page, re.MULTILINE) + if not matches: + return "Warning: No matching log links found at {}/logs/".format(node_address) + return node_address + matches[0] + except Exception as e: + return "Failed to retrieve node logs address from {}: {}".format(node_address, e) + + +def filter_node_log(node_log_address: str, start_time: str, end_time: str): + return run_command("curl", "-s", "http://{}".format(node_log_address), "|", "sed", "-n", + "'/{}/,/{}/p'".format(start_time, end_time)) + + +def get_container_log(log_address, id): + return run_command("curl", "http://{}".format(log_address), "|", "grep", re.sub(r"^(job|application)", "container", id)) + + +def get_application_time(app_info_string): + app_element = ET.fromstring(app_info_string) + start_time_epoch = int(app_element.find("startedTime").text) + finish_time_epoch = int(app_element.find("finishedTime").text) + + start_time_str = datetime.fromtimestamp(start_time_epoch / 1000).strftime(OUTPUT_TIME_FORMAT)[:-4] # -4, the time conversion is not accurrate + finish_time_str = datetime.fromtimestamp(finish_time_epoch / 1000).strftime(OUTPUT_TIME_FORMAT)[:-4] + + return start_time_str, finish_time_str + + +def get_resourcemanager_pid(): + results = run_command("ps", "aux", "|", "grep", "resourcemanager", "|", "grep", "-v", "grep") + + pids = [] + for result in results.strip().splitlines(): + pid = result.split()[1] + pids.append(pid) + + return pids + + +def get_multiple_jstack(pids): + all_jstacks = [] + + for pid in pids: + for i in range(NUMBER_OF_JSTACK): # Get multiple jstack + jstack_output = run_command("jstack", pid) + all_jstacks.append("--- JStack iteration-{} for PID: {} ---\n{}".format(i, pid, jstack_output)) + + return "\n".join(all_jstacks) + + +def set_rm_scheduler_log_level(log_level): + return run_command("yarn", "daemonlog", "-setlevel", RM_ADDRESS, + "org.apache.hadoop.yarn.server.resourcemanager.scheduler", log_level) + + +def format_datetime_no_seconds(datetime_obj): + return datetime_obj.strftime(OUTPUT_TIME_FORMAT_WITHOUT_SECOND) + + +def main(): + + ISSUE_MAP = { + "application_diagnostic": application_diagnostic, + "scheduler_related_issue": scheduler_related_issue, + } + + parser = argparse.ArgumentParser() + parser.add_argument("-l", "--list", help="List the available issue types.", action="store_true") + parser.add_argument("-c", "--command", choices=list(ISSUE_MAP), help="Initiate the diagnostic information collecton" + "for diagnosing the selected issue type.") + parser.add_argument("-a", "--arguments", nargs='*', help="The required arguments for the selected issue type.") + global args + args = parser.parse_args() + + if not (args.list or args.command): + parser.error('No action requested, use --list or --command') + + if args.list: + list_issues() + sys.exit(os.EX_OK) + + global RM_ADDRESS + RM_ADDRESS = parse_url_from_conf(YARN_SITE_XML, RM_ADDRESS_PROPERTY_NAME) + if RM_ADDRESS is None: + print("RM address can't be found, exiting...") + sys.exit(1) + + selected_option = ISSUE_MAP[args.command] + print(selected_option()) # print the resulted output path that will be used by the DiagnosticsService.java + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java index fd767953d16d0..bdd732d8964de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -137,6 +139,22 @@ public ClusterMetricsInfo getClusterMetricsInfo() { getConf(), client); } + @Override + public CommonIssues getCommonIssueList() { + return RouterWebServiceUtil.genericForward(webAppAddress, null, + CommonIssues.class, HTTPMethods.GET, + RMWSConsts.RM_WEB_SERVICE_PATH + RMWSConsts.COMMON_ISSUE_LIST, + null, null, getConf(), client); + } + + @Override + public Response getCommonIssueData(String issueId, List args) { + return RouterWebServiceUtil.genericForward(webAppAddress, null, + Response.class, HTTPMethods.GET, + RMWSConsts.RM_WEB_SERVICE_PATH + RMWSConsts.COMMON_ISSUE_COLLECT, + null, null, getConf(), client); + } + @Override public SchedulerTypeInfo getSchedulerInfo() { return RouterWebServiceUtil.genericForward(webAppAddress, null, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index ce7baa3df69ea..a5f381d623e92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -102,6 +102,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NewApplication; @@ -199,6 +200,7 @@ import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SIGNAL_TOCONTAINER; import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.extractToken; import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.getKerberosUserGroupInformation; +import org.apache.commons.lang3.NotImplementedException; /** * Extends the {@code AbstractRESTRequestInterceptor} class and provides an @@ -1141,6 +1143,16 @@ public AppState getAppState(HttpServletRequest hsr, String appId) return new AppState(); } + @Override + public CommonIssues getCommonIssueList() { + throw new NotImplementedException("Code is not implemented"); + } + + @Override + public Response getCommonIssueData(String issueId, List args) { + throw new NotImplementedException("Code is not implemented"); + } + @Override public ClusterInfo get() { return getClusterInfo(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java index 1266598ff0e27..9323514429618 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; @@ -67,6 +68,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -300,6 +302,30 @@ public ClusterMetricsInfo getClusterMetricsInfo() { return pipeline.getRootInterceptor().getClusterMetricsInfo(); } + @GET + @Path(RMWSConsts.COMMON_ISSUE_LIST) + @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, + MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 }) + @Override + public CommonIssues getCommonIssueList() { + init(); + RequestInterceptorChainWrapper pipeline = getInterceptorChain(null); + return pipeline.getRootInterceptor().getCommonIssueList(); + } + + @GET + @Path(RMWSConsts.COMMON_ISSUE_COLLECT) + @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, + MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 }) + @Override + public Response getCommonIssueData( + @QueryParam(RMWSConsts.ISSUEID) String issueId, + @QueryParam(RMWSConsts.ISSUEARGS) List args) { + init(); + RequestInterceptorChainWrapper pipeline = getInterceptorChain(null); + return pipeline.getRootInterceptor().getCommonIssueData(issueId, args); + } + @GET @Path(RMWSConsts.SCHEDULER) @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java index a09199b9e856b..bbab51b06bfbd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.router.webapp; import java.io.IOException; +import java.util.List; import java.util.Set; import javax.servlet.http.HttpServletRequest; @@ -44,6 +45,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -97,6 +99,16 @@ public ClusterMetricsInfo getClusterMetricsInfo() { return new ClusterMetricsInfo(); } + @Override // TODO these may need to be edited for testing purposes + public CommonIssues getCommonIssueList() { + return new CommonIssues(); + } + + @Override // TODO these may need to be edited for testing purposes + public Response getCommonIssueData(String issueId, List args) { + return Response.ok().build(); + } + @Override public SchedulerTypeInfo getSchedulerInfo() { return new SchedulerTypeInfo(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java index 1bffd40db3c19..663f720a35d90 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.router.webapp; import java.io.IOException; +import java.util.List; import java.util.Set; import javax.servlet.http.HttpServletRequest; @@ -42,6 +43,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -122,6 +124,16 @@ public ClusterMetricsInfo getClusterMetricsInfo() { return getNextInterceptor().getClusterMetricsInfo(); } + @Override + public CommonIssues getCommonIssueList() { + return getNextInterceptor().getCommonIssueList(); + } + + @Override + public Response getCommonIssueData(String issueId, List args) { + return getNextInterceptor().getCommonIssueData(issueId, args); + } + @Override public SchedulerTypeInfo getSchedulerInfo() { return getNextInterceptor().getSchedulerInfo();