Skip to content

Commit

Permalink
Merge ROCm 1.5.1 changes into roc-1.5.x
Browse files Browse the repository at this point in the history
  • Loading branch information
jedwards-AMD committed Jun 3, 2017
2 parents 757f29e + 8ca2ad0 commit 28b1c83
Show file tree
Hide file tree
Showing 52 changed files with 15,012 additions and 556 deletions.
4,479 changes: 4,479 additions & 0 deletions arch/arm64/configs/rock-dbg_defconfig

Large diffs are not rendered by default.

7,821 changes: 7,821 additions & 0 deletions arch/powerpc/configs/rock-dbg_defconfig

Large diffs are not rendered by default.

81 changes: 79 additions & 2 deletions arch/powerpc/platforms/powernv/pci-ioda.c
Original file line number Diff line number Diff line change
Expand Up @@ -1717,6 +1717,62 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}

static void pnv_pci_ioda_dma_sketchy_bypass(struct pnv_ioda_pe *pe)
{
/* Enable a transparent bypass into TVE #1 through DMA window 0 */
s64 rc;
u64 addr;
u64 tce_count;
u64 table_size;
u64 tce_order = 28; /* 256MB TCEs */
u64 window_size = memory_hotplug_max() + (1ULL << 32);
struct page *table_pages;
__be64 *tces;

window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
tce_count = window_size >> tce_order;
table_size = tce_count << 3;

pr_debug("ruscur: table_size %016llx PAGE_SIZE %016lx\n",
table_size, PAGE_SIZE);
if (table_size < PAGE_SIZE) {
pr_debug("ruscur: set table_size to PAGE_SIZE\n");
table_size = PAGE_SIZE;
}

pr_debug("ruscur: tce_count %016llx table_size %016llx\n",
tce_count, table_size);

table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
get_order(table_size));

pr_debug("ruscur: got table_pages %p\n", table_pages);
/* TODO null checking */
tces = page_address(table_pages);
pr_debug("ruscur: got tces %p\n", tces);
memset(tces, 0, table_size);

for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
pr_debug("ruscur: addr %016llx index %016llx\n", addr,
(addr + (1ULL << 32)) >> tce_order);
tces[(addr + (1ULL << 32)) >> tce_order] =
cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
}

rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
pe->pe_number,
/* reconfigure window 0 */
(pe->pe_number << 1) + 0,
1, /* level (unsure what this means) */
__pa(tces),
table_size,
1 << tce_order);
if (rc)
pe_err(pe, "OPAL error %llx in sketchy bypass\n", rc);
else
pe_info(pe, "ruscur's sketchy bypass worked, apparently\n");
}

static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
Expand All @@ -1739,8 +1795,29 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(&pdev->dev, &dma_direct_ops);
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
/* Find out if we want to address more than 2G */
dev_info(&pdev->dev, "My dma_mask is %016llx\n", dma_mask);
if (dma_mask >> 32 /*&& pe->device_count == 1*/) {
/*
* TODO
* This mode shouldn't be used if the PE has any other
* device on it. Things will go wrong.
* We can't just check for device_count of 1 though,
* because of things like GPUs with audio devices and
* stuff like that. So we should walk the PE and check
* if everything else on it has the same vendor ID...?
*/
dev_info(&pdev->dev, "%d devices on my PE\n",
pe->device_count);
/* Set up the bypass mode */
pnv_pci_ioda_dma_sketchy_bypass(pe);
/* 4GB offset places us into TVE#1 */
set_dma_offset(&pdev->dev, (1ULL << 32));
set_dma_ops(&pdev->dev, &dma_direct_ops);
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
}
}
*pdev->dev.dma_mask = dma_mask;

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <linux/list.h>
#include <drm/drmP.h>
#include <linux/dma-buf.h>
#include <linux/pagemap.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu_ucode.h"
#include "gca/gfx_8_0_sh_mask.h"
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0, 0, 0}
Expand Down
12 changes: 2 additions & 10 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,8 +867,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,

pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);

/* never 0 (full-speed), fuse or smc-controlled always */
return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2);
return sprintf(buf, "%i\n", pwm_mode);
}

static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
Expand All @@ -887,14 +886,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
if (err)
return err;

switch (value) {
case 1: /* manual, percent-based */
amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC);
break;
default: /* disable */
amdgpu_dpm_set_fan_control_mode(adev, 0);
break;
}
amdgpu_dpm_set_fan_control_mode(adev, value);

return count;
}
Expand Down
7 changes: 5 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -4846,8 +4846,11 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
/* enable the doorbell if requested */
if (ring->use_doorbell) {
if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_FIJI) ||
(adev->asic_type == CHIP_STONEY)) {
(adev->asic_type == CHIP_FIJI) ||
(adev->asic_type == CHIP_STONEY) ||
(adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12)) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
* in visible VRAM and the address space. Use at most
* half of each. */
uint64_t max_gtt_size = min(
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
adev->mc.visible_vram_size / 8 *
AMDGPU_GPU_PAGE_SIZE / 2,
1ULL << 39);

si_meminfo(&si);
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* in visible VRAM and the address space. Use at most
* half of each. */
uint64_t max_gtt_size = min(
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
adev->mc.visible_vram_size / 8 *
AMDGPU_GPU_PAGE_SIZE / 2,
1ULL << 39);

si_meminfo(&si);
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
* half of each.
*/
uint64_t max_gtt_size = min(
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
adev->mc.visible_vram_size / 8 *
AMDGPU_GPU_PAGE_SIZE / 2,
1ULL << 39);

si_meminfo(&si);
Expand Down
7 changes: 7 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,13 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)

if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);

/* FIXME: temporarily disable SDMA-ULV interrupts for Vega10.
* Remove this once the fix is in firmware.
*/
if (ring->adev->asic_type == CHIP_VEGA10)
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_ULV_CNTL),
0);
}

return 0;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdkfd/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
depends on (DRM_RADEON || DRM_AMDGPU) && (X86_64 || PPC64 || ARM64)
select DRM_AMDGPU_USERPTR
help
Enable this if you want to use HSA features on AMD GPU devices.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2072,6 +2072,7 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
space_left -= copied;
dst_va_addr += copied;
dst_offset += copied;
src_offset += copied;
if (dst_va_addr > dst_bo->it.last + 1) {
pr_err("Cross mem copy failed. Memory overflow\n");
err = -EFAULT;
Expand Down
22 changes: 19 additions & 3 deletions drivers/gpu/drm/amd/amdkfd/kfd_crat.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/mm.h>
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#include <linux/amd-iommu.h>
#endif
#include <linux/pci.h>
#include "kfd_crat.h"
#include "kfd_priv.h"
Expand Down Expand Up @@ -664,6 +666,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
*
* Return 0 if successful else return -ve value
*/
#ifdef CONFIG_ACPI
int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
{
struct acpi_table_header *crat_table;
Expand Down Expand Up @@ -706,6 +709,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)

return 0;
}
#endif

/* Memory required to create Virtual CRAT.
* Since there is no easy way to predict the amount of memory required, the
Expand Down Expand Up @@ -808,12 +812,14 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
struct acpi_table_header *acpi_table;
acpi_status status;
struct crat_subtype_generic *sub_type_hdr;
int avail_size = *size;
int numa_node_id;
int ret = 0;
#ifdef CONFIG_ACPI
struct acpi_table_header *acpi_table;
acpi_status status;
#endif

if (pcrat_image == NULL || avail_size < VCRAT_SIZE_FOR_CPU)
return -EINVAL;
Expand All @@ -829,6 +835,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature));
crat_table->length = sizeof(struct crat_header);

#ifdef CONFIG_ACPI
status = acpi_get_table("DSDT", 0, &acpi_table);
if (status == AE_NOT_FOUND)
pr_warn("DSDT table not found for OEM information\n");
Expand All @@ -837,6 +844,11 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
memcpy(crat_table->oem_id, acpi_table->oem_id, CRAT_OEMID_LENGTH);
memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH);
}
#else
crat_table->oem_revision = 0;
memcpy(crat_table->oem_id, "INV", CRAT_OEMID_LENGTH);
memcpy(crat_table->oem_table_id, "UNAVAIL", CRAT_OEMTABLEID_LENGTH);
#endif
crat_table->total_entries = 0;
crat_table->num_domains = 0;

Expand Down Expand Up @@ -969,15 +981,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_generic *sub_type_hdr;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
struct amd_iommu_device_info iommu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
int ret = 0;
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
struct amd_iommu_device_info iommu_info;
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
#endif
struct kfd_local_mem_info local_mem_info;

if (pcrat_image == NULL || avail_size < VCRAT_SIZE_FOR_GPU)
Expand Down Expand Up @@ -1035,11 +1049,13 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,

/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT */
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
iommu_info.flags = 0;
if (0 == amd_iommu_device_info(kdev->pdev, &iommu_info)) {
if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags)
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
}
#endif

crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdkfd/kfd_crat.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,9 @@ struct cdit_header {

#pragma pack()

#ifdef CONFIG_ACPI
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
#endif
void kfd_destroy_crat_image(void *crat_image);
int kfd_parse_crat_table(void *crat_image,
struct list_head *device_list,
Expand Down
Loading

0 comments on commit 28b1c83

Please sign in to comment.