Take care of following placeholders in the following code:
IMAP_SERVER
: the address of the server, the API uses port 993 and SSL per default.IMAP_USERNAME
: the username for the connection.IMAP_PASSWORD
: the password for the connection.IMAP_FOLDER
: the folder to be processed (please note the read-only flag in the code below).local_filename
: in case of storing attachments locally, need to generate a safe path.from imapclient import IMAPClient
import ssl
import email
import email.header
from email import policy
ssl_context = ssl.create_default_context()
with IMAPClient(IMAP_HOSTNAME, ssl_context=ssl_context) as server:
server.login(IMAP_USERNAME, IMAP_PASSWORD)
# show capabilities of the IMAP server:
#print(server.capabilities())
# select a folder for processing
select_info = server.select_folder(IMAP_FOLDER, readonly=True)
# get a list of all messages (adapt as needed)
total_messages = select_info[b'EXISTS']
messages = server.search()
# you can fetch all messages at once or a subset of them based on a filter
# in this case, messages will be fetched and processed one-by-one
for msg in messages:
# fetch the message, normally only one message should be returned here
for uid, message_data in server.fetch(msg, ['RFC822', 'FLAGS', 'INTERNALDATE', 'UID']).items():
# parse the message, missing policy falls back to compat32 api
# the new API takes care of handling basic types and decoding strings (e.g. UTF-8 headers)
email_message = email.message_from_bytes(message_data[b'RFC822'], policy=policy.default)
print(' - SEQ', message_data[b'SEQ'])
print(' - FLAGS', message_data[b'FLAGS'])
# the internal date is the timestamp, when the message arrived at the server (and it is assigned by the server)
# it can be used for forensic work in order to find some mismatches
internaldate_raw = message_data[b'INTERNALDATE']
internaldate_ts = message_data[b'INTERNALDATE'].timestamp()
print(' - INTERNALDATE', internaldate_raw, internaldate_ts)
print(' - From:', email_message.get('From'))
print(' - Subject:', email_message.get('Subject'))
date_header = email_message.get('Date')
print(' - Date:', date_header, date_header.datetime.timestamp())
print(' - Message-ID:', email_message.get('Message-ID'))
print(' - In-Reply-To:', email_message.get('In-Reply-To'))
# iterate over parts, e.g. to find relevant attachments
# see documentation: there are convenience methods for iterating over specific parts only
for part in email_message.walk():
print(' - part', part.get_content_type(), 'filename', part.get_filename())
# most of the real attachments have a declared file name (but this is not a 100% rule!)
if part.get_filename() != None:
local_filename = ...
# the API delivers one of: bytes, EmailMessage or str, depending on the Content-Type
content = part.get_content()
if isinstance(content, email.message.EmailMessage):
content = content.as_bytes()
elif isinstance(content, str):
charset = part.get_content_charset()
print(' - charset', charset)
content = content.encode(charset or 'utf-8')
# save the content to a file
with open(local_filename, 'wb') as f:
f.write(content)
In case of using the legacy API, a few things work differently. The same is valid for the MBOX approach below.
=?utf-8?Q?hello?=
. Manual decoding could be arranged as follows:list of (subject, subject_encoding) = email.header.decode_header(email_message.get('Subject'))
subject = subject.decode(subject_encoding or 'utf-8')
part.get_payload(None, True)
If you don't need a live-interaction with the application (see: plugin approach via WebExtension API or A "Hello World" Extension Tutorial), there is an extremely easy approach just by accessing the files directly. E-mails are stored in a simple MBOX format.
MBOX_FILE
: physical location of the needed MBOX-fileimport mailbox
import datetime
mb = mailbox.mbox(MBOX_FILE, create=False)
for email_message in mb:
print(' - From:', email_message.get('From'))
print(' - Subject:', email_message.get('Subject'))
date_header = email_message.get('Date')
print(' - Date:', date_header)
# you need to parse the Date header manually
# in most cases, the given format is correct
date_ts = 0.0
fmt = '%a, %d %b %Y %H:%M:%S %z'
try:
date_ts = datetime.datetime.strptime(date_header, fmt).timestamp()
except ValueError as v:
# it can be that the header contains additional data (e.g. the name of the time zone)
# assuming English-based exception messages and no changes in the message format,
# we cut the excessive data and try again
ulr = len(v.args[0].partition('unconverted data remains: ')[2])
if ulr:
date_ts = datetime.datetime.strptime(date_header[:-ulr], fmt).timestamp()
else:
# still no luck, you need to adapt the code to your situation
raise v
print(' - Date:', date_ts)
print(' - Message-ID:', email_message.get('Message-ID'))
print(' - In-Reply-To:', email_message.get('In-Reply-To'))
#print(' - Received:', email_message.get('Received'), type(email_message.get('Received')))
# same as in the IMAP example above
for part in email_message.walk():
print(' - part', part.get_content_type(), 'filename', part.get_filename())
if part.get_filename() != None:
local_filename = ...
# mailbox generates mailbox.mboxMessage objects not EmailMessage objects
# mailbox.mboxMessage seems to be identical to the compat32 API
content = part.get_payload(None, True)
if isinstance(content, mailbox.mboxMessage): # not sure if this is the right class
content = content.as_bytes()
elif isinstance(content, str): # not sure if this case may happen at all
charset = part.get_content_charset()
print(' - charset', charset)
content = content.encode(charset or 'utf-8')
with open(local_filename, 'wb') as f:
f.write(content)
Next: SMS TPDU Transfer Protocol Data Unit