How to access iMessage chat database using sqlite3 and python


Today I got my first GitHub star!!!

The repo is iExport which is an innocent piece of Python to extract the contents of a OS X user's chat.db, conveniently contained out-of-sight in ~/Library/Messages/chat.db and accessible via sqlite3 chat.db.

I've annotated the script below to show you how it's done.

#!/usr/bin/env python3

import base64
import mimetypes
import cgi
import sqlite3
from os import path

 # this is the path where iMessage app stores the contents locally
CHAT_DB = path.expanduser("~/Library/Messages/chat.db")

 # unix epoch used in query
EPOCH = 978307200

# this is just some basic formatted html that colorizes the chats
# the messages and images are interpolated into divs below
print("""
<!doctype html>
<html>
<head>
<meta charset=\"utf-8\">
<style>
  body { margin: 0; padding: 0; }
  .message {
      white-space: pre-wrap;
      max-width: 800px;
      padding: 10px;
      margin: 10px;
  }
  .me { background-color: #A6DBFF; }
  .buddy { background-color: #EEE; }
  .message img { max-width: 800px; }
</style>
</head>
<body>
""")

# to see the chats you have, use this util
# def list_chats():
#     db = sqlite3.connect(CHAT_DB)
#     cursor = db.cursor()
#     rows = cursor.execute("""
#         SELECT chat_identifier
#           FROM chat;
#     """)
#     for row in rows:
#         export(row[0])

def export_all():
    db = sqlite3.connect(CHAT_DB)
    cursor = db.cursor()
    rows = cursor.execute("""
        SELECT chat_identifier
          FROM chat;
    """)
    for row in rows:
        export(row[0])

# main query—get text, timestamp, and images from messages
# iterate over these rows, and base64 encode attachments
# in order to display images then just interpolate them like so:
# line = "<div class=\"message from-%s\" title=\"%s\">%s</div>" % (who, date, text)
# note: remove LIMIT 1 to get all messages
def export(chat_id):
    db = sqlite3.connect(CHAT_DB)
    cursor = db.cursor()
    rows = cursor.execute("""
          SELECT datetime(m.date + ?, 'unixepoch', 'localtime') as fmtdate,
                 m.is_from_me,
                 m.text,
                 a.filename
            FROM chat as c
      INNER JOIN chat_message_join AS cm
              ON cm.chat_id = c.ROWID
      INNER JOIN message AS m
                 ON m.ROWID = cm.message_id
       LEFT JOIN message_attachment_join AS ma
              ON ma.message_id = m.ROWID
       LEFT JOIN attachment as a
              ON a.ROWID = ma.attachment_id
           WHERE c.chat_identifier = ?
        ORDER BY m.date
           LIMIT 1;
    """, (EPOCH, chat_id))

    for row in rows:
        date = row[0]
        who = "me" if row[1] is 1 else "contact"
        if row[3]:
            attachment = path.expanduser(row[3])
            media_type = mimetypes.guess_type(attachment)[0]
            try:
                with open(attachment, "rb") as image:
                    encoded_data = base64.b64encode(image.read())
            except:
                encoded_data = ""
            text = "<img src=\"data:%s;base64,%s\">" % (
                media_type, encoded_data)
            text = "<img src=\"%s\">" % (attachment)
        else:
            text = cgi.escape(row[2] or '')
        line = "<div class=\"message from-%s\" title=\"%s\">%s</div>" % (who, date, text)
        print(line.encode("utf8"))

# crude way to construct the document, I know :~)
# you'll want to pipe the output
# python ixport.py > imessages.html
# and then pray that your browser is prepared
# I would suggest no scrolling until all has loaded
# or opening a PR to paginate the data from chunks
print("""
</body>
</html>
""")

export_all()