Skip to content
This repository was archived by the owner on Mar 12, 2020. It is now read-only.

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
deepakkumar1984 committed Feb 11, 2019
1 parent b87e8c3 commit a44ce5d
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 192 deletions.
10 changes: 8 additions & 2 deletions Examples/MNIST/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ static void Main(string[] args)
{
Global.UseGpu();

Tensor x = Tensor.FromArray(Global.Device, new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
x = x.Reshape(3, 3);

var result = TOps.Diag(x);
result.Print();

string datasetFolder = @"C:\dataset\MNIST";
bool useDenseModel = false;

Expand Down Expand Up @@ -50,9 +56,9 @@ private static Sequential BuildFCModel()
private static Sequential BuildConvModel()
{
Sequential model = new Sequential();
model.Add(new Conv2D(filters: 16, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.Sigmoid));
model.Add(new Conv2D(filters: 16, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.ReLU));
model.Add(new MaxPooling2D(poolSize: Tuple.Create<uint, uint>(2, 2)));
model.Add(new Conv2D(filters: 32, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.Sigmoid));
model.Add(new Conv2D(filters: 32, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.ReLU));
model.Add(new MaxPooling2D(poolSize: Tuple.Create<uint, uint>(2, 2)));
//model.Add(new Dropout(0.2f));
model.Add(new Flatten());
Expand Down
2 changes: 1 addition & 1 deletion ManagedCuda/ManagedCuda.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<DefineConstants>TRACE;WIN,CUDA90,CUDNN7</DefineConstants>
<DefineConstants>TRACE;WIN,CUDA100,CUDNN7</DefineConstants>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

Expand Down
2 changes: 1 addition & 1 deletion SiaNet.Test/Im2ColTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public void Im2Col_2d()
[TestMethod]
public void DiagTest()
{
//Global.UseGpu();
Global.UseGpu();
Tensor x = Tensor.FromArray(Global.Device, new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
x = x.Reshape(3, 3);

Expand Down
9 changes: 9 additions & 0 deletions Tensor/TensorSharp/Cuda/CudaBasicOps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using System.Linq;
using System.Text;
using TensorSharp.Core;
using TensorSharp.Cuda.DeviceCode;
using TensorSharp.CUDA.DeviceCode;
using TensorSharp.CUDA.KernelOps;
using TensorSharp.CUDA.MatrixMul;
Expand Down Expand Up @@ -61,6 +62,8 @@ public class CudaBasicOps
/// </summary>
private readonly ReduceDimIndexKernels reduceDimIndexKernels = new ReduceDimIndexKernels();

private readonly MatrixOps matrixOps = new MatrixOps();


/// <summary>
/// Initializes a new instance of the <see cref="CudaBasicOps"/> class.
Expand Down Expand Up @@ -986,5 +989,11 @@ public Tensor StdAll(Tensor result, Tensor src)
return writeTarget;
}


[RegisterOpStorageType("diag", typeof(CudaStorage))]
public Tensor Diag(Tensor src)
{
return matrixOps.Diag(src);
}
}
}
93 changes: 24 additions & 69 deletions Tensor/TensorSharp/Cuda/DeviceCode/CU/MatrixOps.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,69 +2,17 @@
// tensor, dimension 'dim' is skipped. The tensors are assumed to have the same
// size (with the exception of 't2' in dimension 'dim').
// This version uses a static number of dimensions.
template <typename IndexType, int Dims>
struct IndexToScatterGatherOffsets {
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = Dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
*t1Offset += curDimIndex * t1.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}

static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = Dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}
};

// Same as above but using a dynamic number of dimensions.
template <typename IndexType>
struct IndexToScatterGatherOffsets<IndexType, -1> {
struct DiagOffsets<IndexType, -1> {
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = index.dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
*t1Offset += curDimIndex * t1.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
IndexType linearId, const int dim, const TensorInfo<IndexType>& t, IndexType* tOffset) {
for (int d = t.dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % t.sizes[d];
*tOffset += curDimIndex * t.strides[d];
}
linearId /= index.sizes[d];
}
}

static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = index.dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
linearId /= t.sizes[d];
}
}
};
Expand All @@ -75,20 +23,27 @@ __global__ void diag_kernel(
TensorInfo<IndexType> tensor,
TensorInfo<IndexType> src,
const IndexType totalElements) {
for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < totalElements; linearId += gridDim.x * blockDim.x) {
IndexType tensorOffset = 0;
IndexType srcOffset = 0;
IndexType indexOffset = 0;
for (IndexType i = blockIdx.x * blockDim.x + threadIdx.x; i < totalElements; i += gridDim.x * blockDim.x) {
for (IndexType j = blockIdx.x * blockDim.x + threadIdx.x; j < totalElements; j += gridDim.x * blockDim.x) {
IndexType tensorOffset = 0;
IndexType srcOffset = 0;

IndexToScatterGatherOffsets<IndexType, Dims>::compute(linearId, dim,
index, &indexOffset,
tensor, &tensorOffset,
src, &srcOffset);
DiagOffsets<IndexType>::compute(i, dim, tensor, &tensorOffset);
DiagOffsets<IndexType>::compute(i, dim, src, &srcOffset);

IndexType indexValue = (IndexType)index.data[indexOffset];
srcOffset += indexValue * src.strides[dim];
if (i == j)
{
IndexType indexValue = (IndexType)src.data[tensorOffset];
srcOffset += indexValue * src.strides[dim];

tensor.data[tensorOffset] = src.data[srcOffset];
tensor.data[tensorOffset] = src.data[srcOffset];
}
else
{
tensor.data[tensorOffset] = 0;
}

}
}
};

Expand Down
113 changes: 1 addition & 112 deletions Tensor/TensorSharp/Cuda/DeviceCode/MatrixKernels.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,116 +32,6 @@ namespace TensorSharp.CUDA.DeviceCode
[Precompile]
public class MatrixKernels : CudaCode
{
/// <summary>
/// The code
/// </summary>
public static string Code = @"
// Compute the offsets into the given tensors for a linear index. For the 't2'
// tensor, dimension 'dim' is skipped. The tensors are assumed to have the same
// size (with the exception of 't2' in dimension 'dim').
// This version uses a static number of dimensions.
template <typename IndexType, int Dims>
struct IndexToScatterGatherOffsets {
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = Dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
*t1Offset += curDimIndex * t1.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = Dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}
};
// Same as above but using a dynamic number of dimensions.
template <typename IndexType>
struct IndexToScatterGatherOffsets<IndexType, -1> {
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = index.dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
*t1Offset += curDimIndex * t1.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}
static __device__ void compute(
IndexType linearId, const int dim,
const TensorInfo<IndexType>& index, IndexType* indexOffset,
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
for (int d = index.dims - 1; d >= 0; d--) {
IndexType curDimIndex = linearId % index.sizes[d];
*indexOffset += curDimIndex * index.strides[d];
if (d != dim) {
*t2Offset += curDimIndex * t2.strides[d];
}
linearId /= index.sizes[d];
}
}
};
template <typename IndexType, int Dims>
__global__ void diag_kernel(
TensorInfo<IndexType> tensor,
TensorInfo<IndexType> src,
const IndexType totalElements) {
for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < totalElements; linearId += gridDim.x * blockDim.x) {
IndexType tensorOffset = 0;
IndexType srcOffset = 0;
IndexType indexOffset = 0;
IndexToScatterGatherOffsets<IndexType, Dims>::compute(linearId, dim,
index, &indexOffset,
tensor, &tensorOffset,
src, &srcOffset);
IndexType indexValue = (IndexType)index.data[indexOffset];
srcOffset += indexValue * src.strides[dim];
tensor.data[tensorOffset] = src.data[srcOffset];
}
};
#define DECLARE_DIAG(KERNEL_NAME, INDEX_TYPE, DIMS) \
extern ""C"" {\
__global__ void KERNEL_NAME(\
TensorInfo<INDEX_TYPE> tensor,\
TensorInfo<INDEX_TYPE> src,\
INDEX_TYPE totalElements)\
{\
diag_kernel<INDEX_TYPE, DIMS>(tensor, src, totalElements);\
}\
}
";

/// <summary>
/// The diag matrix base name
/// </summary>
Expand All @@ -161,8 +51,7 @@ public MatrixKernels() : base(GetCode(), "General", "ReduceApplyUtils")
/// <returns>System.String.</returns>
private static string GetCode()
{
Code = Resources.MatrixOps;
var sb = new StringBuilder(Code);
var sb = new StringBuilder(Resources.MatrixOps);

sb.AppendLine(GetMacroInvocations(true, 1));
sb.AppendLine(GetMacroInvocations(true, 2));
Expand Down
22 changes: 22 additions & 0 deletions Tensor/TensorSharp/Cuda/KernelOps/MatrixOps.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using System.Text;
using TensorSharp.CUDA;
using TensorSharp.CUDA.DeviceCode;

namespace TensorSharp.Cuda.KernelOps
{
[OpsClass]
public class MatrixOps
{
private readonly MatrixKernels matrixKernels = new MatrixKernels();

public MatrixOps()
{

}

[RegisterOpStorageType("diag", typeof(CudaStorage))]
public Tensor Diag(Tensor src) { return matrixKernels.Diag(src); }
}
}
7 changes: 0 additions & 7 deletions Tensor/TensorSharp/TensorSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
<Compile Remove="Cuda\DeviceCode\MatrixKernels.cs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="System.Drawing.Common" Version="4.5.1" />
<PackageReference Include="System.Drawing.Primitives" Version="4.3.0" />
Expand All @@ -62,9 +58,6 @@
</ItemGroup>

<ItemGroup>
<None Update="Cuda\DeviceCode\CU\GatherSelect.cu">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Cuda\DeviceCode\CU\MatrixOps.c">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
Expand Down

0 comments on commit a44ce5d

Please sign in to comment.