Skip to content

Commit

Permalink
Deko3D improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
XITRIX committed Nov 7, 2024
1 parent 56c2374 commit 269d70f
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 61 deletions.
103 changes: 55 additions & 48 deletions app/src/streaming/video/deko3d/DKVideoRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ namespace
{
static constexpr unsigned StaticCmdSize = 0x10000;

struct Transformation
{
glm::mat3 yuvmat;
glm::vec3 offset;
glm::vec4 uv_data;
};

struct Vertex
{
float position[3];
Expand Down Expand Up @@ -98,7 +105,7 @@ void DKVideoRenderer::checkAndInitialize(int width, int height, AVFrame* frame)
m_screen_width = width;
m_screen_height = height;

auto *vctx = (brls::SwitchVideoContext *)brls::Application::getPlatform()->getVideoContext();
vctx = (brls::SwitchVideoContext *)brls::Application::getPlatform()->getVideoContext();
this->dev = vctx->getDeko3dDevice();
this->queue = vctx->getQueue();

Expand All @@ -123,6 +130,35 @@ void DKVideoRenderer::checkAndInitialize(int width, int height, AVFrame* frame)
vertexBuffer = pool_data->allocate(sizeof(QuadVertexData), alignof(Vertex));
memcpy(vertexBuffer.getCpuAddr(), QuadVertexData.data(), vertexBuffer.getSize());


// Load the transform buffer
transformUniformBuffer = pool_data->allocate(sizeof(Transformation), DK_UNIFORM_BUF_ALIGNMENT);
auto transformState = reinterpret_cast<Transformation *>(transformUniformBuffer.getCpuAddr());

bool colorFull = frame->color_range == AVCOL_RANGE_JPEG;

transformState->offset = gl_color_offset(colorFull);
transformState->yuvmat = gl_color_matrix(frame->colorspace, colorFull);

float frameAspect = ((float)m_frame_height / (float)m_frame_width);
float screenAspect = ((float)m_screen_height / (float)m_screen_width);

if (frameAspect > screenAspect) {
float multiplier = frameAspect / screenAspect;
transformState->uv_data = { 0.5f - 0.5f * (1.0f / multiplier),
0.0f, multiplier, 1.0f };
} else {
float multiplier = screenAspect / frameAspect;
transformState->uv_data = { 0.0f,
0.5f - 0.5f * (1.0f / multiplier), 1.0f, multiplier };
}


// cmdbuf.pushConstants(
// transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(),
// 0, sizeof(transformState), &transformState);


// Allocate image indexes for planes
lumaTextureId = vctx->allocateImageIndex();
chromaTextureId = vctx->allocateImageIndex();
Expand Down Expand Up @@ -161,48 +197,10 @@ void DKVideoRenderer::checkAndInitialize(int width, int height, AVFrame* frame)
imageDescriptorSet->update(cmdbuf, lumaTextureId, lumaDesc);
imageDescriptorSet->update(cmdbuf, chromaTextureId, chromaDesc);

// Load the transform buffer
transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT);

bool colorFull = frame->color_range == AVCOL_RANGE_JPEG;

transformState.offset = gl_color_offset(colorFull);
transformState.yuvmat = gl_color_matrix(frame->colorspace, colorFull);

float frameAspect = ((float)m_frame_height / (float)m_frame_width);
float screenAspect = ((float)m_screen_height / (float)m_screen_width);

if (frameAspect > screenAspect) {
float multiplier = frameAspect / screenAspect;
transformState.uv_data = { 0.5f - 0.5f * (1.0f / multiplier),
0.0f, multiplier, 1.0f };
} else {
float multiplier = screenAspect / frameAspect;
transformState.uv_data = { 0.0f,
0.5f - 0.5f * (1.0f / multiplier), 1.0f, multiplier };
}

cmdbuf.pushConstants(
transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(),
0, sizeof(transformState), &transformState);

queue.submitCommands(cmdbuf.finishList());
queue.waitIdle();

m_is_initialized = true;
}

int frames = 0;
uint64_t timeCount = 0;

void DKVideoRenderer::draw(NVGcontext* vg, int width, int height, AVFrame* frame, int imageFormat) {
checkAndInitialize(width, height, frame);

uint64_t before_render = LiGetMillis();

if (!m_video_render_stats.rendered_frames) {
m_video_render_stats.measurement_start_timestamp = before_render;
}

dk::RasterizerState rasterizerState;
dk::ColorState colorState;
Expand All @@ -212,33 +210,42 @@ void DKVideoRenderer::draw(NVGcontext* vg, int width, int height, AVFrame* frame
cmdbuf.clear();
cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f);

// brls::Logger::debug("TIME LOG 1: {}", float(LiGetMillis() - before_render));

// Bind state required for drawing the triangle
cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader });
cmdbuf.bindTextures(DkStage_Fragment, 0, dkMakeTextureHandle(lumaTextureId, 0));
cmdbuf.bindTextures(DkStage_Fragment, 1, dkMakeTextureHandle(chromaTextureId, 0));
cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize());
cmdbuf.bindUniformBuffer(DkStage_Fragment, 2, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize());
cmdbuf.bindRasterizerState(rasterizerState);
cmdbuf.bindColorState(colorState);
cmdbuf.bindColorWriteState(colorWriteState);
cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize());
cmdbuf.bindVtxAttribState(VertexAttribState);
cmdbuf.bindVtxBufferState(VertexBufferState);

// brls::Logger::debug("TIME LOG 2: {}", float(LiGetMillis() - before_render));

// Draw the triangle
cmdbuf.draw(DkPrimitive_Quads, QuadVertexData.size(), 1, 0, 0);
cmdlist = cmdbuf.finishList();

// brls::Logger::debug("TIME LOG 3: {}", float(LiGetMillis() - before_render));
m_is_initialized = true;
}

int frames = 0;
uint64_t timeCount = 0;

void DKVideoRenderer::draw(NVGcontext* vg, int width, int height, AVFrame* frame, int imageFormat) {
checkAndInitialize(width, height, frame);

uint64_t before_render = LiGetMillis();

if (!m_video_render_stats.rendered_frames) {
m_video_render_stats.measurement_start_timestamp = before_render;
}

// Finish off this command list
queue.submitCommands(cmdbuf.finishList());
queue = vctx->getQueue();
queue.submitCommands(cmdlist);
queue.waitIdle();

// brls::Logger::debug("TIME LOG 4: {}", float(LiGetMillis() - before_render));

frames++;
timeCount += LiGetMillis() - before_render;

Expand Down
14 changes: 4 additions & 10 deletions app/src/streaming/video/deko3d/DKVideoRenderer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,12 @@
#include <glm/mat4x4.hpp>

#include <borealis.hpp>
#include <borealis/platforms/switch/switch_video.hpp>
#include <nanovg/framework/CShader.h>
#include <nanovg/framework/CExternalImage.h>
#include <nanovg/framework/CDescriptorSet.h>
#include <optional>

struct Transformation
{
glm::mat3 yuvmat;
glm::vec3 offset;
glm::vec4 uv_data;
};

class DKVideoRenderer : public IVideoRenderer {
public:
DKVideoRenderer();
Expand All @@ -38,9 +32,7 @@ class DKVideoRenderer : public IVideoRenderer {
int m_screen_width = 0;
int m_screen_height = 0;

Transformation transformState;
CMemPool::Handle transformUniformBuffer;

brls::SwitchVideoContext *vctx = nullptr;
dk::Device dev;
dk::Queue queue;

Expand All @@ -49,6 +41,7 @@ class DKVideoRenderer : public IVideoRenderer {
std::optional<CMemPool> pool_data;

dk::UniqueCmdBuf cmdbuf;
DkCmdList cmdlist;

CDescriptorSet<4096U> *imageDescriptorSet;
// CDescriptorSet<1> samplerDescriptorSet;
Expand All @@ -57,6 +50,7 @@ class DKVideoRenderer : public IVideoRenderer {
CShader fragmentShader;

CMemPool::Handle vertexBuffer;
CMemPool::Handle transformUniformBuffer;

dk::ImageLayout lumaMappingLayout;
dk::ImageLayout chromaMappingLayout;
Expand Down
28 changes: 25 additions & 3 deletions app/src/streaming/video/deko3d/texture_fsh.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,29 @@ layout (std140, binding = 2) uniform Transformation

void main()
{
vec2 uv = (vTextureCoord - u.uv_data.xy) * u.uv_data.zw;
vec3 YCbCr = vec3(texture(plane0, uv).r, texture(plane1, uv).r, texture(plane1, uv).g) - u.offset;
outColor = vec4(clamp(u.yuvmat * YCbCr, 0.0, 1.0), 1.0);
// Not work
// vec2 uv = (vTextureCoord - u.uv_data.xy) * u.uv_data.zw;
// vec3 YCbCr = vec3(texture(plane0, uv).r, texture(plane1, uv).r, texture(plane1, uv).g) - u.offset;
// outColor = vec4(clamp(u.yuvmat * YCbCr, 0.0, 1.0), 1.0);

// Almost work
// vec3 YCbCr = vec3(
// texture2D(plane0, vTextureCoord)[0],
// texture2D(plane1, vTextureCoord).xy
// );

// YCbCr -= u.offset;
// outColor = vec4(clamp(u.yuvmat * YCbCr, 0.0, 1.0), 1.0f);

float r, g, b, yt, ut, vt;

yt = texture2D(plane0, vTextureCoord).r;
ut = texture2D(plane1, vTextureCoord).r - 0.5;
vt = texture2D(plane1, vTextureCoord).g - 0.5;

r = yt + 1.13983*vt;
g = yt - 0.39465*ut - 0.58060*vt;
b = yt + 2.03211*ut;

outColor = vec4(r, g, b, 1.0);
}

0 comments on commit 269d70f

Please sign in to comment.