Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract embedded captions while processing file #141

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions lib/active_encode/engine_adapters/ffmpeg_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def create(input_url, options = {})
# Create a working directory that holds all output files related to the encode
FileUtils.mkdir_p working_path("", new_encode.id)
FileUtils.mkdir_p working_path("outputs", new_encode.id)
FileUtils.mkdir_p working_path("supplemental_files", new_encode.id)

# Extract technical metadata from input file
curl_option = if options && options[:headers]
Expand Down Expand Up @@ -91,6 +92,8 @@ def create(input_url, options = {})
new_encode.input.duration = fixed_duration(working_path("duration_input_metadata", new_encode.id))
end

options[:subtitle_count] = new_encode.input.subtitle_count if new_encode.input.subtitle_count&.positive?

new_encode.state = :running
new_encode.percent_complete = 1
new_encode.errors = []
Expand Down Expand Up @@ -150,6 +153,7 @@ def find(id, opts = {})
end

encode.output = build_outputs encode if encode.completed?
encode.output += build_supplemental_outputs encode if encode.completed?

encode
end
Expand Down Expand Up @@ -251,17 +255,46 @@ def build_outputs(encode)
outputs
end

def build_supplemental_outputs(encode)
id = encode.id
files = []
Dir["#{File.absolute_path(working_path('supplemental_files', id))}/*"].each do |file_path|
file = ActiveEncode::Output.new
file.url = "file://#{file_path}"
file.id = "#{encode.input.id}-#{File.basename(file_path)}"
file.created_at = encode.created_at
file.updated_at = File.mtime file_path

files << file
end

files
end

def ffmpeg_command(input_url, id, opts)
sanitized_filename = ActiveEncode.sanitize_base input_url
output_opt = opts[:outputs].collect do |output|
sanitized_filename = ActiveEncode.sanitize_base input_url
file_name = "outputs/#{sanitized_filename}-#{output[:label]}.#{output[:extension]}"
" #{output[:ffmpeg_opt]} #{working_path(file_name, id)}"
end.join(" ")

supplemental_file_opt = caption_extraction_options(sanitized_filename, opts[:subtitle_count], id) if opts[:subtitle_count]&.positive?

header_opt = Array(opts[:headers]).map do |k, v|
"#{k}: #{v}\r\n"
end.join
header_opt = "-headers '#{header_opt}'" if header_opt.present?
"#{FFMPEG_PATH} #{header_opt} -y -loglevel level+fatal -progress #{working_path('progress', id)} -i \"#{input_url}\" #{output_opt}"
"#{FFMPEG_PATH} #{header_opt} -y -loglevel level+fatal -progress #{working_path('progress', id)} -i \"#{input_url}\" #{supplemental_file_opt} #{output_opt}"
end

def caption_extraction_options(filename, count, id)
opts = ""
(0..count - 1).each do |i|
subtitle_filename = "supplemental_files/#{filename}-caption#{i}.vtt"
opts += " -map 0:s:#{i} -c:s webvtt #{working_path(subtitle_filename, id)}"
end

opts
end

def get_pid(id)
Expand Down Expand Up @@ -314,6 +347,7 @@ def get_tech_metadata(file_path)
doc.remove_namespaces!
duration = get_xpath_text(doc, '//Duration/text()', :to_f)
duration *= 1000 unless duration.nil? # Convert to milliseconds
subtitle_count = doc.xpath('//track[@type="Text"]').length
{ url: get_xpath_text(doc, '//media/@ref', :to_s),
width: get_xpath_text(doc, '//Width/text()', :to_f),
height: get_xpath_text(doc, '//Height/text()', :to_f),
Expand All @@ -323,7 +357,8 @@ def get_tech_metadata(file_path)
audio_codec: get_xpath_text(doc, '//track[@type="Audio"]/CodecID/text()', :to_s),
audio_bitrate: get_xpath_text(doc, '//track[@type="Audio"]/BitRate/text()', :to_i),
video_codec: get_xpath_text(doc, '//track[@type="Video"]/CodecID/text()', :to_s),
video_bitrate: get_xpath_text(doc, '//track[@type="Video"]/BitRate/text()', :to_i) }
video_bitrate: get_xpath_text(doc, '//track[@type="Video"]/BitRate/text()', :to_i),
subtitle_count: subtitle_count }
end

def get_xpath_text(doc, xpath, cast_method)
Expand Down
6 changes: 5 additions & 1 deletion lib/active_encode/engine_adapters/ffmpeg_adapter/cleaner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def remove_files(files, older_than)
def remove_empty_directories(directories)
directories_to_delete = directories.select { |d| Dir.empty?(d) }
non_empty_directories = directories - directories_to_delete
directories_to_delete += non_empty_directories.select { |ned| Dir.children(ned) == ["outputs"] && directories_to_delete.include?(File.join(ned, "outputs")) }
directories_to_delete += non_empty_directories.select do |ned|
Dir.children(ned).sort == ["outputs", "supplemental_files"] &&
directories_to_delete.include?(File.join(ned, "outputs")) &&
directories_to_delete.include?(File.join(ned, "supplemental_files"))
end
FileUtils.rmdir(directories_to_delete) unless directories_to_delete.empty?
end

Expand Down
4 changes: 3 additions & 1 deletion lib/active_encode/technical_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ module TechnicalMetadata
attr_accessor :video_codec
attr_accessor :audio_bitrate
attr_accessor :video_bitrate

attr_accessor :subtitle_count
end

def assign_tech_metadata(metadata)
[:width, :height, :frame_rate, :duration, :file_size, :checksum,
:audio_codec, :video_codec, :audio_bitrate, :video_bitrate].each do |field|
:audio_codec, :video_codec, :audio_bitrate, :video_bitrate, :subtitle_count].each do |field|
send("#{field}=", metadata[field]) if metadata.key?(field)
end
end
Expand Down
Binary file added spec/fixtures/file_with_embedded_captions.mp4
Binary file not shown.
40 changes: 40 additions & 0 deletions spec/integration/ffmpeg_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,42 @@ def find_encode(id)
end
end

context 'file with embedded captions' do
let(:file_with_embedded_captions) { "file://" + Rails.root.join('..', 'spec', 'fixtures', 'file_with_embedded_captions.mp4').to_s }
let!(:create_embedded_captions_job) { ActiveEncode::Base.create(file_with_embedded_captions, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_embedded_captions_job) { ActiveEncode::Base.find create_embedded_captions_job.id }

it "does not have errors" do
sleep 2
expect(find_embedded_captions_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_embedded_captions_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_embedded_captions_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_embedded_captions) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', 'file_with_embedded_captions.mp4').to_s) }

it "does not have errors" do
sleep 2
expect(find_embedded_captions_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_embedded_captions_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_embedded_captions_job.id}/pid")).not_to be_empty
end
end
end

context 'when failed' do
subject { created_job }

Expand Down Expand Up @@ -505,6 +541,10 @@ def find_encode(id)
end

context ":all" do
# The cleaner removes empty directories even if they are younger than the defined :older_than param.
# We need to use a file that will populate the supplemental files directory for proper testing of :all behavior.
let(:file) { "file://" + Rails.root.join('..', 'spec', 'fixtures', 'file_with_embedded_captions.mp4').to_s }

it "deletes all files and directories older than 2 weeks" do
sleep 1
travel 3.weeks do
Expand Down