Play video at expected frame rate

REGoth-project · Jun 3, 2018 · b484eae · b484eae
1 parent e194fd9
commit b484eae
Showing 1 changed file with 141 additions and 73 deletions.
diff --git a/src/media/Video.cpp b/src/media/Video.cpp
@@ -8,6 +8,7 @@
 #include <utils/logger.h>
 #include <utils/Utils.h>
 #include <fstream>
+#include <queue>
 #include "Video.h"
 #include "ui/ImageView.h"
 
@@ -47,9 +48,20 @@ struct deleteAVFrame
     }
 };
 
+struct deleteAVFrameRef
+{
+    void operator()(AVFrame *ptr)
+    {
+        if (ptr)
+            av_frame_unref(ptr);
+        deleteAVFrame{}(ptr);
+    }
+};
+
 using AVCodecContextPtr = std::unique_ptr<AVCodecContext, deleteAVCodecContext>;
 using AVFormatContextPtr = std::unique_ptr<AVFormatContext, deleteAVFormatContext>;
 using AVFramePtr = std::unique_ptr<AVFrame, deleteAVFrame>;
+using AVFrameRefPtr = std::unique_ptr<AVFrame, deleteAVFrameRef>;
 
 }
 
@@ -69,14 +81,13 @@ namespace Media {
             engine.getRootUIView().addChild(view);
         }
 
-        bool init(const uint16_t width, const uint16_t height)
+        ~Video()
         {
-            frame.reset(av_frame_alloc());
-            if (!frame) {
-                LogWarn() << "Could not allocate frame";
-                return false;
-            }
+            view->setHidden(true);
+        }
 
+        bool init(const uint16_t width, const uint16_t height)
+        {
             av_init_packet(&packet);
             packet.data = nullptr;
             packet.size = 0;
@@ -102,24 +113,33 @@ namespace Media {
                 return false;
             }
 
-            if (!openCodecContext(videoStreamIndex, videoCodecContext, AVMEDIA_TYPE_VIDEO)) {
+            if (!openCodecContext(videoStreamIndex, videoCodecContext, videoTimeBase, AVMEDIA_TYPE_VIDEO)) {
                 LogWarn() << "Could not open video context";
                 return false;
             }
 
-            if (!openCodecContext(audioStreamIndex, audioCodecContext, AVMEDIA_TYPE_AUDIO)) {
+            if (!openCodecContext(audioStreamIndex, audioCodecContext, audioTimeBase, AVMEDIA_TYPE_AUDIO)) {
                 LogWarn() << "Could not open audio context";
                 return false;
             }
 
             av_dump_format(formatContext.get(), 0, fileFullPath.c_str(), 0); // TODO: dump to logger
 
+            if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
+                LogWarn() << "Unknown pixel format";
+                return false;
+            }
+
+            // load first frame
+            if (nextVideoFrame() != Outcome::FrameVideo)
+                return false;
+
             initialized_ = true;
             return true;
         }
 
     private:
-        bool openCodecContext(int &streamIndex, AVCodecContextPtr &codecContext, AVMediaType mediaType)
+        bool openCodecContext(int &streamIndex, AVCodecContextPtr &codecContext, double &timeBase, AVMediaType mediaType)
         {
             int index = av_find_best_stream(formatContext.get(), mediaType, -1, -1, nullptr, 0);
             if (index < 0) {
@@ -147,104 +167,150 @@ namespace Media {
             }
 
             AVDictionary *opts = nullptr;
-            av_dict_set(&opts, "refcounted_frames", "0", 0);
+            av_dict_set(&opts, "refcounted_frames", "1", 0);
             if (avcodec_open2(context.get(), codec, &opts) < 0) {
                 LogWarn() << "Could not open codec for stream" << av_get_media_type_string(mediaType);
                 return false;
             }
 
             streamIndex = index;
             std::swap(codecContext, context);
+            timeBase = av_q2d(stream->time_base);
             return true;
         }
 
-    public:
-        void nextFrame()
+        enum class Outcome {
+            End, Error, FrameVideo, FrameAudio
+        };
+
+        Outcome nextVideoFrame()
+        {
+            while(true) {
+                Outcome r = nextFrame();
+                if (r == Outcome::End || r == Outcome::Error)
+                    return r;
+
+                if (r == Outcome::FrameVideo)
+                    return r;
+            }
+        }
+
+        Outcome nextFrame()
         {
             int r = av_read_frame(formatContext.get(), &packet);
             if (r < 0) {// no more frames or error
-                std::cout << "nextframe abort 1" << std::endl;
-                ::abort();
+                LogInfo() << "No more frames or cannot read them";
+                return Outcome::End;
             }
 
             if (packet.stream_index == videoStreamIndex) {
                 r = avcodec_send_packet(videoCodecContext.get(), &packet);
                 if(r < 0 || r == AVERROR(EAGAIN) || r == AVERROR_EOF) {
-                    std::cout << "avcodec_send_packet: " << r << std::endl;
-                    ::abort();
+                    LogWarn() << "avcodec_send_packet: " << r << std::endl;
+                    return Outcome::Error;
                 }
 
                 while (r >= 0) {
+                    AVFrameRefPtr frame{av_frame_alloc()};
                     r = avcodec_receive_frame(videoCodecContext.get(), frame.get());
                     if (r == AVERROR(EAGAIN) || r == AVERROR_EOF) {
                         std::cout << "avcodec_receive_frame: " << r << std::endl;
                         break;
                     }
-                    std::cout << "video frame: " << videoCodecContext->frame_number << std::endl;
 
-                    view->setHidden(false);
-                    view->setSize(Math::float2(1,1));
+                    std::cout << "video frame: " << videoCodecContext->frame_number << ", pts: " << frame->pts << std::endl;
 
-                    if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
-                        LogWarn() << "Unknown pixel format";
-                        return;
-                    }
-
-                    Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc();
-
-                    std::vector<uint8_t> data;
-                    int count = videoCodecContext->width * videoCodecContext->height;
-                    auto clamp = [](int x){ return std::min(255, std::max(0, x)); };
-
-                    // conversion from YUV420p to RGBA
-                    // TODO: this can be probably accelerated by doing conversion within the fragment shader
-
-                    uint8_t *src = frame->data[0];
-                    int linesize = frame->linesize[0];
-                    int w = videoCodecContext->width;
-                    int h = videoCodecContext->height;
-
-                    uint8_t *dataY = frame->data[0];
-                    int linesizeY = frame->linesize[0];
+                    videoFrames.push(std::move(frame));
+                }
+                return Outcome::FrameVideo;
+            } else if(packet.stream_index == audioStreamIndex) {
+                std::cout << "An audio frame" << std::endl;
+                return Outcome::FrameAudio;
+            } else {
+                LogWarn() << "Invalid stream index";
+                return Outcome::Error;
+            }
+        }
 
-                    uint8_t *dataU = frame->data[1];
-                    int linesizeU = frame->linesize[1];
+        std::vector<uint8_t> yuv420pToRGBA(const AVFrameRefPtr &frame)
+        {
+            std::vector<uint8_t> data;
+            int count = videoCodecContext->width * videoCodecContext->height;
+            auto clamp = [](int x){ return std::min(255, std::max(0, x)); };
+
+            uint8_t *src = frame->data[0];
+            int linesize = frame->linesize[0];
+            int w = videoCodecContext->width;
+            int h = videoCodecContext->height;
+
+            uint8_t *dataY = frame->data[0];
+            int linesizeY = frame->linesize[0];
+
+            uint8_t *dataU = frame->data[1];
+            int linesizeU = frame->linesize[1];
+
+            uint8_t *dataV = frame->data[2];
+            int linesizeV = frame->linesize[2];
+
+            for (int y = 0; y < h; ++y)
+                for (int x = 0; x < w; ++x) {
+                    int Y = dataY[y*linesizeY+x];
+                    int U = dataU[y/2*linesizeU+x/2];
+                    int V = dataV[y/2*linesizeV+x/2];
+
+                    int C = Y - 16;
+                    int D = U - 128;
+                    int E = V - 128;
+
+                    int R = clamp( (298*C         + 409*E + 128 ) >> 8 );
+                    int G = clamp( (298*C - 100*D - 208*E + 128 ) >> 8 );
+                    int B = clamp( (298*C + 516*D         + 128 ) >> 8 );
+
+                    data.push_back(R);
+                    data.push_back(G);
+                    data.push_back(B);
+                    data.push_back(255);
+                }
 
-                    uint8_t *dataV = frame->data[2];
-                    int linesizeV = frame->linesize[2];
+            return data;
+        }
 
-                    for (int y = 0; y < h; ++y)
-                        for (int x = 0; x < w; ++x) {
-                            int Y = dataY[y*linesizeY+x];
-                            int U = dataU[y/2*linesizeU+x/2];
-                            int V = dataV[y/2*linesizeV+x/2];
+        void displayVideoFrame(const AVFrameRefPtr &frame)
+        {
+            // TODO: this can be probably accelerated by doing conversion within the fragment shader
+            auto data = yuv420pToRGBA(frame);
 
-                            int C = Y - 16;
-                            int D = U - 128;
-                            int E = V - 128;
+            view->setHidden(false);
+            view->setSize(Math::float2(1,1));
 
-                            int R = clamp( (298*C         + 409*E + 128 ) >> 8 );
-                            int G = clamp( (298*C - 100*D - 208*E + 128 ) >> 8 );
-                            int B = clamp( (298*C + 516*D         + 128 ) >> 8 );
+            Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc();
 
-                            data.push_back(R);
-                            data.push_back(G);
-                            data.push_back(B);
-                            data.push_back(255);
-                        }
+            if (!texture.isValid())
+                texture = alloc.loadTextureRGBA8(data, videoCodecContext->width, videoCodecContext->height);
+            else
+                std::swap(alloc.getTexture(texture).imageData, data);
+            alloc.asyncFinalizeLoad(texture);
+            view->setImage(texture, videoCodecContext->width, videoCodecContext->height);
+        }
 
-                    if (!texture.isValid())
-                        texture = alloc.loadTextureRGBA8(data, videoCodecContext->width, videoCodecContext->height);
-                    else
-                        std::swap(alloc.getTexture(texture).imageData, data);
-                    alloc.asyncFinalizeLoad(texture);
-                    view->setImage(texture, videoCodecContext->width, videoCodecContext->height);
-                }
-            } else if(packet.stream_index == audioStreamIndex) {
-                std::cout << "An audio frame" << std::endl;
-            } else {
-                LogWarn() << "Invalid stream index";
+public:
+        bool update(double dt)
+        {
+            currentTime += dt/videoTimeBase;
+            assert(!videoFrames.empty());
+            assert(videoFrames.front());
+            if (currentTime >= videoFrames.front()->pts) {
+                AVFrameRefPtr frame;
+                videoFrames.front().swap(frame);
+                videoFrames.pop();
+                std::cout << "Displaying frame " << frame->display_picture_number << " " << frame->pts << std::endl;
+
+                displayVideoFrame(frame);
+
+                if (nextVideoFrame() != Outcome::FrameVideo)
+                    return false;
             }
+            return true;
         }
 
         bool initialized() const {
@@ -258,7 +324,8 @@ namespace Media {
         AVFormatContextPtr formatContext;
         int videoStreamIndex, audioStreamIndex;
         AVCodecContextPtr videoCodecContext, audioCodecContext;
-        AVFramePtr frame;
+        std::queue<AVFrameRefPtr> videoFrames;
+        double videoTimeBase = 0, audioTimeBase = 0, currentTime = 0;
         AVPacket packet;
         Handle::TextureHandle texture;
         UI::ImageView *view;
@@ -292,7 +359,8 @@ void Media::VideoPlayer::frameUpdate(double dt, uint16_t width, uint16_t height)
             return;
         }
 
-    currentVideo->nextFrame();
+    if (!currentVideo->update(dt))
+        currentVideo = nullptr;
 #endif
 }