module MagicBytes

Constants

FileType

Describes a file type with its file extension and MIME type

HEADER_SIZE

The maximum length supported (needed for .tar archives)

ReadError

Gets raised when the file being read from is at EOF or empty (when read() from the file returns `nil`)

VERSION

Public Instance Methods

detect(header_bytes) click to toggle source

This is a line-for-line port of github.com/sindresorhus/file-type which is more than sufficient for our purposes

@param header_bytes the header bytes of the file @return [Hash, nil] the hash of ext: and mime: or nil if the type could not be deduced

# File lib/magic_bytes.rb, line 31
def detect(header_bytes)
  raise ReadError unless header_bytes
  d = _detect(header_bytes)
  FileType.new(d.fetch(:ext), d.fetch(:mime))
end
read_and_detect(io) click to toggle source

Performs detection from a given IO or File.

@param io a readable object @return [Hash, nil] the hash of ext: and mime: or nil if the type could not be deduced

# File lib/magic_bytes.rb, line 20
def read_and_detect(io)
  first_n_bytes = io.read(HEADER_SIZE)
  raise ReadError unless first_n_bytes
  detect(first_n_bytes)
end

Private Instance Methods

_detect(header_bytes) click to toggle source
# File lib/magic_bytes.rb, line 39
def _detect(header_bytes)
  buf = header_bytes.unpack("C*")

  if (buf[0] == 0xFF && buf[1] == 0xD8 && buf[2] == 0xFF)
      return {
          ext: 'jpg',
          mime: 'image/jpeg'
      }
  end

  if (buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47)
      return {
          ext: 'png',
          mime: 'image/png'
      }
  end

  if (buf[0] == 0x47 && buf[1] == 0x49 && buf[2] == 0x46)
      return {
          ext: 'gif',
          mime: 'image/gif'
      }
  end

  if (buf[8] == 0x57 && buf[9] == 0x45 && buf[10] == 0x42 && buf[11] == 0x50)
      return {
          ext: 'webp',
          mime: 'image/webp'
      }
  end

  # needs to be before `tif`
  if (((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A)) && buf[8] == 0x43 && buf[9] == 0x52)
      return {
          ext: 'cr2',
          mime: 'image/x-canon-cr2'
      }
  end

  if ((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A))
      return {
          ext: 'tif',
          mime: 'image/tiff'
      }
  end

  if (buf[0] == 0x42 && buf[1] == 0x4D)
      return {
          ext: 'bmp',
          mime: 'image/bmp'
      }
  end

  if (buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0xBC)
      return {
          ext: 'jxr',
          mime: 'image/vnd.ms-photo'
      }
  end

  if (buf[0] == 0x38 && buf[1] == 0x42 && buf[2] == 0x50 && buf[3] == 0x53)
      return {
          ext: 'psd',
          mime: 'image/vnd.adobe.photoshop'
      }
  end

  # needs to be before `zip`
  if (buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x6D && buf[31] == 0x69 && buf[32] == 0x6D && buf[33] == 0x65 && buf[34] == 0x74 && buf[35] == 0x79 && buf[36] == 0x70 && buf[37] == 0x65 && buf[38] == 0x61 && buf[39] == 0x70 && buf[40] == 0x70 && buf[41] == 0x6C && buf[42] == 0x69 && buf[43] == 0x63 && buf[44] == 0x61 && buf[45] == 0x74 && buf[46] == 0x69 && buf[47] == 0x6F && buf[48] == 0x6E && buf[49] == 0x2F && buf[50] == 0x65 && buf[51] == 0x70 && buf[52] == 0x75 && buf[53] == 0x62 && buf[54] == 0x2B && buf[55] == 0x7A && buf[56] == 0x69 && buf[57] == 0x70)
      return {
          ext: 'epub',
          mime: 'application/epub+zip'
      }
  end

  # needs to be before `zip`
  # assumes signed.xpi from addons.mozilla.org
  if (buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x4D && buf[31] == 0x45 && buf[32] == 0x54 && buf[33] == 0x41 && buf[34] == 0x2D && buf[35] == 0x49 && buf[36] == 0x4E && buf[37] == 0x46 && buf[38] == 0x2F && buf[39] == 0x6D && buf[40] == 0x6F && buf[41] == 0x7A && buf[42] == 0x69 && buf[43] == 0x6C && buf[44] == 0x6C && buf[45] == 0x61 && buf[46] == 0x2E && buf[47] == 0x72 && buf[48] == 0x73 && buf[49] == 0x61)
      return {
          ext: 'xpi',
          mime: 'application/x-xpinstall'
      }
  end

  if (buf[0] == 0x50 && buf[1] == 0x4B && (buf[2] == 0x3 || buf[2] == 0x5 || buf[2] == 0x7) && (buf[3] == 0x4 || buf[3] == 0x6 || buf[3] == 0x8))
      return {
          ext: 'zip',
          mime: 'application/zip'
      }
  end

  if (buf[257] == 0x75 && buf[258] == 0x73 && buf[259] == 0x74 && buf[260] == 0x61 && buf[261] == 0x72)
      return {
          ext: 'tar',
          mime: 'application/x-tar'
      }
  end

  if (buf[0] == 0x52 && buf[1] == 0x61 && buf[2] == 0x72 && buf[3] == 0x21 && buf[4] == 0x1A && buf[5] == 0x7 && (buf[6] == 0x0 || buf[6] == 0x1))
      return {
          ext: 'rar',
          mime: 'application/x-rar-compressed'
      }
  end

  if (buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8)
      return {
          ext: 'gz',
          mime: 'application/gzip'
      }
  end

  if (buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68)
      return {
          ext: 'bz2',
          mime: 'application/x-bzip2'
      }
  end

  if (buf[0] == 0x37 && buf[1] == 0x7A && buf[2] == 0xBC && buf[3] == 0xAF && buf[4] == 0x27 && buf[5] == 0x1C)
      return {
          ext: '7z',
          mime: 'application/x-7z-compressed'
      }
  end

  if (buf[0] == 0x78 && buf[1] == 0x01)
      return {
          ext: 'dmg',
          mime: 'application/x-apple-diskimage'
      }
  end

  if (
      (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && (buf[3] == 0x18 || buf[3] == 0x20) && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70) ||
      (buf[0] == 0x33 && buf[1] == 0x67 && buf[2] == 0x70 && buf[3] == 0x35) ||
      (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x6D && buf[9] == 0x70 && buf[10] == 0x34 && buf[11] == 0x32 && buf[16] == 0x6D && buf[17] == 0x70 && buf[18] == 0x34 && buf[19] == 0x31 && buf[20] == 0x6D && buf[21] == 0x70 && buf[22] == 0x34 && buf[23] == 0x32 && buf[24] == 0x69 && buf[25] == 0x73 && buf[26] == 0x6F && buf[27] == 0x6D) ||
      (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x69 && buf[9] == 0x73 && buf[10] == 0x6F && buf[11] == 0x6D) ||
      (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1c && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x6D && buf[9] == 0x70 && buf[10] == 0x34 && buf[11] == 0x32 && buf[12] == 0x0 && buf[13] == 0x0 && buf[14] == 0x0 && buf[15] == 0x0)
  )
      return {
          ext: 'mp4',
          mime: 'video/mp4'
      }
  end

  if ((buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x56))
      return {
          ext: 'm4v',
          mime: 'video/x-m4v'
      }
  end

  if (buf[0] == 0x4D && buf[1] == 0x54 && buf[2] == 0x68 && buf[3] == 0x64)
      return {
          ext: 'mid',
          mime: 'audio/midi'
      }
  end

  # needs to be before the `webm`
  if (buf[31] == 0x6D && buf[32] == 0x61 && buf[33] == 0x74 && buf[34] == 0x72 && buf[35] == 0x6f && buf[36] == 0x73 && buf[37] == 0x6B && buf[38] == 0x61)
      return {
          ext: 'mkv',
          mime: 'video/x-matroska'
      }
  end

  if (buf[0] == 0x1A && buf[1] == 0x45 && buf[2] == 0xDF && buf[3] == 0xA3)
      return {
          ext: 'webm',
          mime: 'video/webm'
      }
  end

  if (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x14 && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70)
      return {
          ext: 'mov',
          mime: 'video/quicktime'
      }
  end

  if (buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x41 && buf[9] == 0x56 && buf[10] == 0x49)
      return {
          ext: 'avi',
          mime: 'video/x-msvideo'
      }
  end

  if (buf[0] == 0x30 && buf[1] == 0x26 && buf[2] == 0xB2 && buf[3] == 0x75 && buf[4] == 0x8E && buf[5] == 0x66 && buf[6] == 0xCF && buf[7] == 0x11 && buf[8] == 0xA6 && buf[9] == 0xD9)
      return {
          ext: 'wmv',
          mime: 'video/x-ms-wmv'
      }
  end

  if (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x1 && buf[3] == 186) # buf[3].toString(16)[0] == 'b'
      return {
          ext: 'mpg',
          mime: 'video/mpeg'
      }
  end

  if ((buf[0] == 0x49 && buf[1] == 0x44 && buf[2] == 0x33) || (buf[0] == 0xFF && buf[1] == 0xfb))
      return {
          ext: 'mp3',
          mime: 'audio/mpeg'
      }
  end

  if ((buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x41) || (buf[0] == 0x4D && buf[1] == 0x34 && buf[2] == 0x41 && buf[3] == 0x20))
      return {
          ext: 'm4a',
          mime: 'audio/m4a'
      }
  end

  # needs to be before `ogg`
  if (buf[28] == 0x4F && buf[29] == 0x70 && buf[30] == 0x75 && buf[31] == 0x73 && buf[32] == 0x48 && buf[33] == 0x65 && buf[34] == 0x61 && buf[35] == 0x64)
      return {
          ext: 'opus',
          mime: 'audio/opus'
      }
  end

  if (buf[0] == 0x4F && buf[1] == 0x67 && buf[2] == 0x67 && buf[3] == 0x53)
      return {
          ext: 'ogg',
          mime: 'audio/ogg'
      }
  end

  if (buf[0] == 0x66 && buf[1] == 0x4C && buf[2] == 0x61 && buf[3] == 0x43)
      return {
          ext: 'flac',
          mime: 'audio/x-flac'
      }
  end

  if (buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x57 && buf[9] == 0x41 && buf[10] == 0x56 && buf[11] == 0x45)
      return {
          ext: 'wav',
          mime: 'audio/x-wav'
      }
  end

  if (buf[0] == 0x23 && buf[1] == 0x21 && buf[2] == 0x41 && buf[3] == 0x4D && buf[4] == 0x52 && buf[5] == 0x0A)
      return {
          ext: 'amr',
          mime: 'audio/amr'
      }
  end

  if (buf[0] == 0x25 && buf[1] == 0x50 && buf[2] == 0x44 && buf[3] == 0x46)
      return {
          ext: 'pdf',
          mime: 'application/pdf'
      }
  end

  if (buf[0] == 0x4D && buf[1] == 0x5A)
      return {
          ext: 'exe',
          mime: 'application/x-msdownload'
      }
  end

  if ((buf[0] == 0x43 || buf[0] == 0x46) && buf[1] == 0x57 && buf[2] == 0x53)
      return {
          ext: 'swf',
          mime: 'application/x-shockwave-flash'
      }
  end

  if (buf[0] == 0x7B && buf[1] == 0x5C && buf[2] == 0x72 && buf[3] == 0x74 && buf[4] == 0x66)
      return {
          ext: 'rtf',
          mime: 'application/rtf'
      }
  end

  if (
      (buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x46) &&
      (
          (buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00) ||
          (buf[4] == 0x4F && buf[5] == 0x54 && buf[6] == 0x54 && buf[7] == 0x4F)
      )
  )
      return {
          ext: 'woff',
          mime: 'application/font-woff'
      }
  end

  if (
      (buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x32) &&
      (
          (buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00) ||
          (buf[4] == 0x4F && buf[5] == 0x54 && buf[6] == 0x54 && buf[7] == 0x4F)
      )
  )
      return {
          ext: 'woff2',
          mime: 'application/font-woff'
      }
  end

  if (
      (buf[34] == 0x4C && buf[35] == 0x50) &&
      (
          (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x01) ||
          (buf[8] == 0x01 && buf[9] == 0x00 && buf[10] == 0x02) ||
          (buf[8] == 0x02 && buf[9] == 0x00 && buf[10] == 0x02)
      )
  )
      return {
          ext: 'eot',
          mime: 'application/octet-stream'
      }
  end

  if (buf[0] == 0x00 && buf[1] == 0x01 && buf[2] == 0x00 && buf[3] == 0x00 && buf[4] == 0x00)
      return {
          ext: 'ttf',
          mime: 'application/font-sfnt'
      }
  end

  if (buf[0] == 0x4F && buf[1] == 0x54 && buf[2] == 0x54 && buf[3] == 0x4F && buf[4] == 0x00)
      return {
          ext: 'otf',
          mime: 'application/font-sfnt'
      }
  end

  if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0x01 && buf[3] == 0x00)
      return {
          ext: 'ico',
          mime: 'image/x-icon'
      }
  end

  if (buf[0] == 0x46 && buf[1] == 0x4C && buf[2] == 0x56 && buf[3] == 0x01)
      return {
          ext: 'flv',
          mime: 'video/x-flv'
      }
  end

  if (buf[0] == 0x25 && buf[1] == 0x21)
      return {
          ext: 'ps',
          mime: 'application/postscript'
      }
  end

  if (buf[0] == 0xFD && buf[1] == 0x37 && buf[2] == 0x7A && buf[3] == 0x58 && buf[4] == 0x5A && buf[5] == 0x00)
      return {
          ext: 'xz',
          mime: 'application/x-xz'
      }
  end

  if (buf[0] == 0x53 && buf[1] == 0x51 && buf[2] == 0x4C && buf[3] == 0x69)
      return {
          ext: 'sqlite',
          mime: 'application/x-sqlite3'
      }
  end

  if (buf[0] == 0x4E && buf[1] == 0x45 && buf[2] == 0x53 && buf[3] == 0x1A)
      return {
          ext: 'nes',
          mime: 'application/x-nintendo-nes-rom'
      }
  end

  if (buf[0] == 0x43 && buf[1] == 0x72 && buf[2] == 0x32 && buf[3] == 0x34)
      return {
          ext: 'crx',
          mime: 'application/x-google-chrome-extension'
      }
  end

  if (
      (buf[0] == 0x4D && buf[1] == 0x53 && buf[2] == 0x43 && buf[3] == 0x46) ||
      (buf[0] == 0x49 && buf[1] == 0x53 && buf[2] == 0x63 && buf[3] == 0x28)
  )
      return {
          ext: 'cab',
          mime: 'application/vnd.ms-cab-compressed'
      }
  end

  # needs to be before `ar`
  if (buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E && buf[7] == 0x0A && buf[8] == 0x64 && buf[9] == 0x65 && buf[10] == 0x62 && buf[11] == 0x69 && buf[12] == 0x61 && buf[13] == 0x6E && buf[14] == 0x2D && buf[15] == 0x62 && buf[16] == 0x69 && buf[17] == 0x6E && buf[18] == 0x61 && buf[19] == 0x72 && buf[20] == 0x79)
      return {
          ext: 'deb',
          mime: 'application/x-deb'
      }
  end

  if (buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E)
      return {
          ext: 'ar',
          mime: 'application/x-unix-archive'
      }
  end

  if (buf[0] == 0xED && buf[1] == 0xAB && buf[2] == 0xEE && buf[3] == 0xDB)
      return {
          ext: 'rpm',
          mime: 'application/x-rpm'
      }
  end

  if (
      (buf[0] == 0x1F && buf[1] == 0xA0) ||
      (buf[0] == 0x1F && buf[1] == 0x9D)
  )
      return {
          ext: 'Z',
          mime: 'application/x-compress'
      }
  end

  if (buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50)
      return {
          ext: 'lz',
          mime: 'application/x-lzip'
      }
  end

  if (buf[0] == 0xD0 && buf[1] == 0xCF && buf[2] == 0x11 && buf[3] == 0xE0 && buf[4] == 0xA1 && buf[5] == 0xB1 && buf[6] == 0x1A && buf[7] == 0xE1)
      return {
          ext: 'msi',
          mime: 'application/x-msi'
      }
  end
  
  return {ext: 'bin', mime: 'binary/octet-stream'}
end