[Bf-blender-cvs] [71c7f3d0123] cycles_oneapi: FFmpeg: Add VFR media support

Wed Jun 29 10:38:37 CEST 2022

Commit: 71c7f3d0123144372991819337708ef493fb665c
Author: Richard Antalik
Date:   Mon Jun 27 16:16:21 2022 +0200
Branches: cycles_oneapi
https://developer.blender.org/rB71c7f3d0123144372991819337708ef493fb665c

FFmpeg: Add VFR media support

Variable frame rate (VFR) files have been difficult to work with.
This is because during sequential decoding, spacing between frames is
not always equal, but it was assumed to be equal. This can result in
movie getting out of sync with sound and difference between preview and
rendered image. A way to resolve these issues was to build and use
timecodes which is quite lengthy and resource intensive process. Such
issues are also difficult to communicate through UI because it is not
possible to predict if timecode usage would be needed.

With this patch, double buffer is used to keep previously decoded frame.
If current frame has PTS greater than what we are looking for, it is not
time to display it yet, and previous frame is displayed instead.

Each `AVFrame` has information about it's duration, so in theory double
buffering would not be needed, but in practice this information is
unreliable.

To ensure double buffer is always used, function
`ffmpeg_decode_video_frame_scan` is used for sequential decoding, even
if no scanning is expected.

This approach is similar to D6392, but this implementation does not
require seeking so it is much faster. Currently `AVFrame` is only
referenced, so no data is copied and therefore no overhead is added.

Note: There is one known issue where seeking fails even with double
buffering: Some files may seek too far in stream and miss requested
PTS. These require preseeking or greater negative subframe offset

Fixes: T86361, T72347

Reviewed By: zeddb, sergey

Differential Revision: https://developer.blender.org/D13583

===================================================================

M	source/blender/imbuf/intern/IMB_anim.h
M	source/blender/imbuf/intern/anim_movie.c

===================================================================

diff --git a/source/blender/imbuf/intern/IMB_anim.h b/source/blender/imbuf/intern/IMB_anim.h
index e99572adbb0..0ac1d7bfb74 100644
--- a/source/blender/imbuf/intern/IMB_anim.h
+++ b/source/blender/imbuf/intern/IMB_anim.h
@@ -109,17 +109,22 @@ struct anim {
   AVFormatContext *pFormatCtx;
   AVCodecContext *pCodecCtx;
   const AVCodec *pCodec;
-  AVFrame *pFrame;
-  int pFrameComplete;
   AVFrame *pFrameRGB;
   AVFrame *pFrameDeinterlaced;
   struct SwsContext *img_convert_ctx;
   int videoStream;
 
+  AVFrame *pFrame;
+  bool pFrame_complete;
+  AVFrame *pFrame_backup;
+  bool pFrame_backup_complete;
+
   struct ImBuf *cur_frame_final;
   int64_t cur_pts;
   int64_t cur_key_frame_pts;
   AVPacket *cur_packet;
+
+  bool seek_before_decode;
 #endif
 
   char index_dir[768];
diff --git a/source/blender/imbuf/intern/anim_movie.c b/source/blender/imbuf/intern/anim_movie.c
index 0052ce19aa1..12ba6a0217d 100644
--- a/source/blender/imbuf/intern/anim_movie.c
+++ b/source/blender/imbuf/intern/anim_movie.c
@@ -675,7 +675,7 @@ static int startffmpeg(struct anim *anim)
   anim->orientation = 0;
   anim->framesize = anim->x * anim->y * 4;
 
-  anim->cur_position = -1;
+  anim->cur_position = 0;
   anim->cur_frame_final = 0;
   anim->cur_pts = -1;
   anim->cur_key_frame_pts = -1;
@@ -683,7 +683,9 @@ static int startffmpeg(struct anim *anim)
   anim->cur_packet->stream_index = -1;
 
   anim->pFrame = av_frame_alloc();
-  anim->pFrameComplete = false;
+  anim->pFrame_backup = av_frame_alloc();
+  anim->pFrame_backup_complete = false;
+  anim->pFrame_complete = false;
   anim->pFrameDeinterlaced = av_frame_alloc();
   anim->pFrameRGB = av_frame_alloc();
   anim->pFrameRGB->format = AV_PIX_FMT_RGBA;
@@ -698,6 +700,7 @@ static int startffmpeg(struct anim *anim)
     av_frame_free(&anim->pFrameRGB);
     av_frame_free(&anim->pFrameDeinterlaced);
     av_frame_free(&anim->pFrame);
+    av_frame_free(&anim->pFrame_backup);
     anim->pCodecCtx = NULL;
     return -1;
   }
@@ -710,6 +713,7 @@ static int startffmpeg(struct anim *anim)
     av_frame_free(&anim->pFrameRGB);
     av_frame_free(&anim->pFrameDeinterlaced);
     av_frame_free(&anim->pFrame);
+    av_frame_free(&anim->pFrame_backup);
     anim->pCodecCtx = NULL;
     return -1;
   }
@@ -747,6 +751,7 @@ static int startffmpeg(struct anim *anim)
     av_frame_free(&anim->pFrameRGB);
     av_frame_free(&anim->pFrameDeinterlaced);
     av_frame_free(&anim->pFrame);
+    av_frame_free(&anim->pFrame_backup);
     anim->pCodecCtx = NULL;
     return -1;
   }
@@ -781,22 +786,71 @@ static int startffmpeg(struct anim *anim)
   return 0;
 }
 
+static double ffmpeg_steps_per_frame_get(struct anim *anim)
+{
+  AVStream *v_st = anim->pFormatCtx->streams[anim->videoStream];
+  AVRational time_base = v_st->time_base;
+  AVRational frame_rate = av_guess_frame_rate(anim->pFormatCtx, v_st, NULL);
+  return av_q2d(av_inv_q(av_mul_q(frame_rate, time_base)));
+}
+
+/* Store backup frame.
+ * With VFR movies, if PTS is not matched perfectly, scanning continues to look for next PTS.
+ * It is likely to overshoot and scaning stops. Having previous frame backed up, it is possible
+ * to use it when overshoot happens.
+ */
+static void ffmpeg_double_buffer_backup_frame_store(struct anim *anim, int64_t pts_to_search)
+{
+  /* `anim->pFrame` is beyond `pts_to_search`. Don't store it. */
+  if (anim->pFrame_backup_complete && anim->cur_pts >= pts_to_search) {
+    return;
+  }
+  if (!anim->pFrame_complete) {
+    return;
+  }
+
+  if (anim->pFrame_backup_complete) {
+    av_frame_unref(anim->pFrame_backup);
+  }
+
+  av_frame_move_ref(anim->pFrame_backup, anim->pFrame);
+  anim->pFrame_backup_complete = true;
+}
+
+/* Free stored backup frame. */
+static void ffmpeg_double_buffer_backup_frame_clear(struct anim *anim)
+{
+  if (anim->pFrame_backup_complete) {
+    av_frame_unref(anim->pFrame_backup);
+  }
+  anim->pFrame_backup_complete = false;
+}
+
+/* Return recently decoded frame. If it does not exist, return frame from backup buffer. */
+static AVFrame *ffmpeg_double_buffer_frame_fallback_get(struct anim *anim)
+{
+  av_log(anim->pFormatCtx, AV_LOG_ERROR, "DECODE UNHAPPY: PTS not matched!\n");
+
+  if (anim->pFrame_complete) {
+    return anim->pFrame;
+  }
+  if (anim->pFrame_backup_complete) {
+    return anim->pFrame_backup;
+  }
+  return NULL;
+}
+
 /* postprocess the image in anim->pFrame and do color conversion
  * and deinterlacing stuff.
  *
  * Output is anim->cur_frame_final
  */
 
-static void ffmpeg_postprocess(struct anim *anim)
+static void ffmpeg_postprocess(struct anim *anim, AVFrame *input)
 {
-  AVFrame *input = anim->pFrame;
   ImBuf *ibuf = anim->cur_frame_final;
   int filter_y = 0;
 
-  if (!anim->pFrameComplete) {
-    return;
-  }
-
   /* This means the data wasn't read properly,
    * this check stops crashing */
   if (input->data[0] == 0 && input->data[1] == 0 && input->data[2] == 0 && input->data[3] == 0) {
@@ -808,7 +862,7 @@ static void ffmpeg_postprocess(struct anim *anim)
 
   av_log(anim->pFormatCtx,
          AV_LOG_DEBUG,
-         "  POSTPROC: anim->pFrame planes: %p %p %p %p\n",
+         "  POSTPROC: AVFrame planes: %p %p %p %p\n",
          input->data[0],
          input->data[1],
          input->data[2],
@@ -852,6 +906,52 @@ static void ffmpeg_postprocess(struct anim *anim)
   }
 }
 
+static void final_frame_log(struct anim *anim,
+                            int64_t frame_pts_start,
+                            int64_t frame_pts_end,
+                            const char *str)
+{
+  av_log(anim->pFormatCtx,
+         AV_LOG_INFO,
+         "DECODE HAPPY: %s frame PTS range %" PRId64 " - %" PRId64 ".\n",
+         str,
+         frame_pts_start,
+         frame_pts_end);
+}
+
+static bool ffmpeg_pts_isect(int64_t pts_start, int64_t pts_end, int64_t pts_to_search)
+{
+  return pts_start <= pts_to_search && pts_to_search < pts_end;
+}
+
+/* Return frame that matches `pts_to_search`, NULL if matching frame does not exist. */
+static AVFrame *ffmpeg_frame_by_pts_get(struct anim *anim, int64_t pts_to_search)
+{
+  /* NOTE: `frame->pts + frame->pkt_duration` does not always match pts of next frame.
+   * See footage from T86361. Here it is OK to use, because PTS must match current or backup frame.
+   * If there is no current frame, return NULL.
+   */
+  if (!anim->pFrame_complete) {
+    return NULL;
+  }
+
+  const bool backup_frame_ready = anim->pFrame_backup_complete;
+  const int64_t recent_start = av_get_pts_from_frame(anim->pFrame);
+  const int64_t recent_end = recent_start + anim->pFrame->pkt_duration;
+  const int64_t backup_start = backup_frame_ready ? av_get_pts_from_frame(anim->pFrame_backup) : 0;
+
+  AVFrame *best_frame = NULL;
+  if (ffmpeg_pts_isect(recent_start, recent_end, pts_to_search)) {
+    final_frame_log(anim, recent_start, recent_end, "Recent");
+    best_frame = anim->pFrame;
+  }
+  else if (backup_frame_ready && ffmpeg_pts_isect(backup_start, recent_start, pts_to_search)) {
+    final_frame_log(anim, backup_start, recent_start, "Backup");
+    best_frame = anim->pFrame_backup;
+  }
+  return best_frame;
+}
+
 static void ffmpeg_decode_store_frame_pts(struct anim *anim)
 {
   anim->cur_pts = av_get_pts_from_frame(anim->pFrame);
@@ -863,7 +963,7 @@ static void ffmpeg_decode_store_frame_pts(struct anim *anim)
   av_log(anim->pFormatCtx,
          AV_LOG_DEBUG,
          "  FRAME DONE: cur_pts=%" PRId64 ", guessed_pts=%" PRId64 "\n",
-         (anim->pFrame->pts == AV_NOPTS_VALUE) ? -1 : (int64_t)anim->pFrame->pts,
+         av_get_pts_from_frame(anim->pFrame),
          (int64_t)anim->cur_pts);
 }
 
@@ -888,8 +988,8 @@ static int ffmpeg_decode_video_frame(struct anim *anim)
 
   /* Sometimes, decoder returns more than one frame per sent packet. Check if frames are available.
    * This frames must be read, otherwise decoding will fail. See T91405. */
-  anim->pFrameComplete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
-  if (anim->pFrameComplete) {
+  anim->pFrame_complete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
+  if (anim->pFrame_complete) {
     av_log(anim->pFormatCtx, AV_LOG_DEBUG, "  DECODE FROM CODEC BUFFER\n");
     ffmpeg_decode_store_frame_pts(anim);
     return 1;
@@ -902,20 +1002,22 @@ static int ffmpeg_decode_video_frame(struct anim *anim)
   }
 
   while ((rval = ffmpeg_read_video_frame(anim, anim->cur_packet)) >= 0) {
+    if (anim->cur_packet->stream_index != anim->videoStream) {
+      continue;
+    }
+
     av_log(anim->pFormatCtx,
            AV_LOG_DEBUG,
-           "%sREAD: strID=%d (VID: %d) dts=%" PRId64 " pts=%" PRId64 " %s\n",
-           (anim->cur_packet->stream_index == anim->videoStream) ? "->" : "  ",
+           "READ: strID=%d dts=%" PRId64 " pts=%" PRId64 " %s\n",
            anim->cur_packet->stream_index,
-           anim->videoStream,
            (anim->cur_packet->dts == AV_NOPTS_VALUE) ? -1 : (int64_t)anim->cur_packet->dts,
            (anim->cur_packet->pts == AV_NOPTS_VALUE) ? -1 : (int64_t)anim->cur_packet->pts,
            (anim->cur_packet->flags & AV_PKT_FLAG_KEY) ? " KEY" : "");
 
     avcodec_send_packet(anim->pCodecCtx, anim->cur_packet);
-    anim->pFrameComplete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
+    anim->pFrame_complete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
 
-    if (anim->pFrameComplete) {
+    if (anim->pFrame_complete) {
       ffmpeg_decode_store_frame_pts(anim);
       break;
     }
@@ -926,9 +1028,9 @@ static int ffmpeg_decode_video_frame(struct anim *anim)
   if (rval == AVERROR_EOF) {
     /* Flush any remaining frames out of the decoder. */
     avcodec_send_packet(anim->pCodecCtx, NULL);
-    anim->pFrameComplete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
+    anim->pFrame_complete = avcodec_receive_frame(anim->pCodecCtx, anim->pFrame) == 0;
 
-    if (anim->pFrameComplete) {
+    if (anim->pFrame_complete) {
       ffmpeg_decode_store_frame_pts(anim);
       rval = 0;
     }
@@ -990,15 +1092,6 @@ static int ffmpeg_seek_by_byte(AVFormatContext *pFormatCtx)
   return false;
 }
 
-static double ffmpeg_steps_per_frame_get(struct anim *anim)
-{
-  AVStream *v_st = anim->pFormatCtx->streams[anim->videoStre

@@ Diff output truncated at 10240 characters. @@