class FormatParser::MP3Parser

Constants

MAGIC_BE
MAGIC_LE
MAX_FRAMES_TO_SCAN

We limit the number of MPEG frames we scan to obtain our duration estimation

MP3_MIME_TYPE
PNG_HEADER_BYTES
SAMPLES_PER_FRAME

Default frame size for mp3

TIFF_HEADER_BYTES
ZIP_LOCAL_ENTRY_SIGNATURE

For some edge cases

Public Instance Methods

call(raw_io) click to toggle source
# File lib/parsers/mp3_parser.rb, line 61
def call(raw_io)
  io = FormatParser::IOConstraint.new(raw_io)

  # Special case: some ZIPs (Office documents) did detect as MP3s.
  # To avoid having that happen, we check for the PKZIP signature -
  # local entry header signature - at the very start of the file.
  # If the file is too small safe_read will fail too and the parser
  # will terminate here. Same with PNGs. In the future
  # we should implement "confidence" for MP3 as of all our formats
  # it is by far the most lax.
  header = safe_read(io, 8)
  return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
  return if header.start_with?(PNG_HEADER_BYTES)

  io.seek(0)
  return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))

  # Read all the ID3 tags (or at least attempt to)
  io.seek(0)
  id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
  tags = [id3v1, ID3Extraction.attempt_id3_v2_extraction(io)].compact

  io.seek(0) if tags.empty?

  # Compute how many bytes are occupied by the actual MPEG frames
  ignore_bytes_at_tail = id3v1 ? 128 : 0
  ignore_bytes_at_head = io.pos
  bytes_used_by_frames = io.size - ignore_bytes_at_head - ignore_bytes_at_tail

  io.seek(ignore_bytes_at_head)

  maybe_xing_header, initial_frames = parse_mpeg_frames(io)

  return if initial_frames.empty?

  first_frame = initial_frames.first

  id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }

  file_info = FormatParser::Audio.new(
    format: :mp3,
    # media_duration_frames is omitted because the frames
    # in MPEG are not the same thing as in a movie file - they
    # do not tell anything of substance
    num_audio_channels: first_frame.channels,
    audio_sample_rate_hz: first_frame.sample_rate,
    intrinsics: id3tags_hash.merge(id3tags: tags),
    content_type: MP3_MIME_TYPE,
  )

  extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)

  extra_file_attirbutes.each do |name, value|
    file_info.send(:"#{name}=", value)
  end

  if maybe_xing_header
    duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
    _bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
    file_info.media_duration_seconds = duration
    return file_info
  end

  # Estimate duration using the frames we did parse - to have an exact one
  # we would need to have all the frames and thus read most of the file
  _avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
  avg_frame_size = float_average_over(initial_frames, :frame_length)
  avg_sample_rate = float_average_over(initial_frames, :sample_rate)

  est_frame_count = bytes_used_by_frames / avg_frame_size
  est_samples = est_frame_count * SAMPLES_PER_FRAME
  est_duration_seconds = est_samples / avg_sample_rate

  # Safeguard for i.e. some JPEGs being recognized as MP3
  # to prevent ambiguous recognition
  return if est_duration_seconds == Float::INFINITY

  file_info.media_duration_seconds = est_duration_seconds
  file_info
end
likely_match?(filename) click to toggle source
# File lib/parsers/mp3_parser.rb, line 57
def likely_match?(filename)
  filename =~ /\.mp3$/i
end

Private Instance Methods

attempt_xing_header(frame_body) click to toggle source
# File lib/parsers/mp3_parser.rb, line 253
def attempt_xing_header(frame_body)
  unless xing_offset = frame_body.index('Xing')
    return # No Xing in this frame
  end

  io = StringIO.new(frame_body)
  io.seek(xing_offset + 4) # Include the length of "Xing" itself

  # https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
  header_flags, _ = io.read(4).unpack('i>')
  frames = byte_count = toc = vbr_scale = nil

  frames = io.read(4).unpack('N1').first if header_flags & 1 != 0   # FRAMES FLAG

  byte_count = io.read(4).unpack('N1').first if header_flags & 2 != 0   # BYTES FLAG

  toc = io.read(100).unpack('C100') if header_flags & 4 != 0   # TOC FLAG

  vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 != 0   # VBR SCALE FLAG

  VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
end
average_bytes_and_bitrate(_mpeg_frames) click to toggle source
# File lib/parsers/mp3_parser.rb, line 276
def average_bytes_and_bitrate(_mpeg_frames)
  avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
  avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
  [avg_bytes_per_frame, avg_bitrate_per_frame]
end
blend_id3_tags_into_hash(*tags) click to toggle source
# File lib/parsers/mp3_parser.rb, line 296
def blend_id3_tags_into_hash(*tags)
  tags.each_with_object({}) do |tag, h|
    h.merge!(TagWrapper.new(tag).to_h)
  end
end
deep_fetch(from, *keys) click to toggle source
# File lib/parsers/mp3_parser.rb, line 290
def deep_fetch(from, *keys)
  keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
rescue IndexError, NoMethodError
  raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
end
fetch_extra_attributes_from_id3_tags(id3tags_hash) click to toggle source
# File lib/parsers/mp3_parser.rb, line 302
def fetch_extra_attributes_from_id3_tags(id3tags_hash)
  attrs = {}

  attrs[:title] = FormatParser.string_to_lossy_utf8(id3tags_hash[:title]) unless id3tags_hash[:title].to_s.empty?
  attrs[:album] = FormatParser.string_to_lossy_utf8(id3tags_hash[:album]) unless id3tags_hash[:album].to_s.empty?
  attrs[:artist] = FormatParser.string_to_lossy_utf8(id3tags_hash[:artist]) unless id3tags_hash[:artist].to_s.empty?

  attrs
end
float_average_over(enum, property) click to toggle source
# File lib/parsers/mp3_parser.rb, line 286
def float_average_over(enum, property)
  enum.map(&property).inject(&:+) / enum.length.to_f
end
parse_mpeg_frame_header(offset_in_file, _sync, conf, bitrate_freq, rest) click to toggle source
# File lib/parsers/mp3_parser.rb, line 193
def parse_mpeg_frame_header(offset_in_file, _sync, conf, bitrate_freq, rest)
  # see this page for the magic values used in mp3:
  # http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
  samplerates = [
    [11025, 12000,  8000],  # MPEG 2.5
    [],                     # reserved
    [22050, 24000, 16000],  # MPEG 2
    [44100, 48000, 32000],  # MPEG 1
  ]
  v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
  v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
  v1l3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0]
  v2l1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0]
  v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
  v2l3 = v2l2
  bitrate_by_version_by_layer = [
    [nil, v2l3, v2l2, v2l1],  # MPEG Version 2.5  # note that the layers go
    nil,                      # reserved          # from 3 to 1 by design.
    [nil, v2l3, v2l2, v2l1],  # MPEG Version 2    # the first layer id is
    [nil, v1l3, v1l2, v1l1],  # MPEG Version 1    # reserved
  ]

  channels_per_channel_mode = [
    2,  # 00 Stereo
    2,  # 01 Joint stereo (Stereo)
    2,  # 10 Dual channel (2 mono channels)
    1,  # 11 Single channel (Mono)
  ]

  br_id = (bitrate_freq >> 4) & 0x0F  # biterate id
  sr_id = (bitrate_freq >> 2) & 0x03  # sample rate id
  padding = bitrate_freq & 0x02 > 0 ? 1 : 0
  mpeg_id = (conf >> 3) & 0x03
  layer_id = (conf >> 1) & 0x03
  channel_mode = (rest >> 6) & 0x03
  channels = channels_per_channel_mode.fetch(channel_mode)
  sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
  frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
  frame_length = (144_000 * frame_bitrate) / sample_rate + padding
  MPEGFrame.new(
    offset_in_file: offset_in_file,
    mpeg_id: mpeg_id,
    channels: channels,
    sample_rate: sample_rate,
    frame_length: frame_length,
    frame_bitrate: frame_bitrate,
  )
end
parse_mpeg_frames(io) click to toggle source

The implementation of the MPEG frames parsing is mostly based on tinytag, a sweet little Python library for parsing audio metadata - do check it out if you have a minute. pypi.python.org/pypi/tinytag

# File lib/parsers/mp3_parser.rb, line 147
def parse_mpeg_frames(io)
  mpeg_frames = []
  bytes_to_read = 4

  MAX_FRAMES_TO_SCAN.times do |frame_i|
    # Read through until we can latch onto the 11 sync bits. Read in 4-byte
    # increments to save on read() calls
    data = io.read(bytes_to_read)

    # If we are at EOF - stop iterating
    break unless data && data.bytesize == bytes_to_read

    # Look for the sync pattern. It can be either the last byte being 0xFF,
    # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
    four_bytes = data.unpack('C4')
    seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
    if seek_jmp > 0
      io.seek(io.pos - bytes_to_read + seek_jmp)
      next
    end

    # Once we are past that stage we have latched onto a sync frame header
    sync, conf, bitrate_freq, rest = four_bytes
    frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
    mpeg_frames << frame_detail

    # There might be a xing header in the first frame that contains
    # all the info we need, otherwise parse multiple frames to find the
    # accurate average bitrate
    if frame_i == 0
      frame_data_str = io.read(frame_detail.frame_length)
      io.seek(io.pos - frame_detail.frame_length)
      xing_header = attempt_xing_header(frame_data_str)
      if xing_header_usable_for_duration?(xing_header)
        return [xing_header, mpeg_frames]
      end
    end
    if frame_detail.frame_length > 1 # jump over current frame body
      io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
    end
  end
  [nil, mpeg_frames]
rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
  [nil, mpeg_frames]
end
sync_bytes_offset_in_4_byte_seq(four_bytes) click to toggle source

Scan 4 byte values, and check whether there is a pattern of the 11 set bits anywhere within it or whether there is the 0xFF byte at the end

# File lib/parsers/mp3_parser.rb, line 245
def sync_bytes_offset_in_4_byte_seq(four_bytes)
  four_bytes[0...3].each_with_index do |byte, i|
    next_byte = four_bytes[i + 1]
    return i if byte == 0xFF && next_byte > 0xE0
  end
  four_bytes[-1] == 0xFF ? 3 : 4
end
with_id3tag_local_configs() { || ... } click to toggle source
# File lib/parsers/mp3_parser.rb, line 312
def with_id3tag_local_configs
  ID3Tag.local_configuration do |c|
    c.string_encode_options = { invalid: :replace, undef: :replace }
    c.source_encoding_fallback = Encoding::UTF_8

    yield
  end
end
xing_header_usable_for_duration?(xing_header) click to toggle source
# File lib/parsers/mp3_parser.rb, line 282
def xing_header_usable_for_duration?(xing_header)
  xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
end