File: imapfetch/imapfetch.py

#!/usr/bin/python3
"""
==============================================================================
imapfetch.py - fetch and save emails from server folders via IMAP.

Author:   copyright M. Lutz, Dec-4-2015.
License:  provided freely, but with no warranties of any kind.
Usage:    change configurations below, run with no command-line arguments;
          view saved mailbox files with PyMailGUI's "Open" (or other scheme);
          requires Python 3.X, tested and used only on 3.3 and 3.5 so far;
Examples: see HOW.txt for script usage, \runlogs for script outputs,
          and \SavedMailboxes-yourid-yourisp.net for save-mail files and GUI;

This script was originally used to download ~20k saved mails spanning more
than a decade, before closing a former ISP email account (a one-time download
that took over 5 hours for a slow ISP).  Configure to use your own account's
parameters using the upper-case settings at the start of the code below.

Fetches all messages from all (or selected) mail folders at an email account
host, using the IMAP interface.  POP provides access to the inbox only; IMAP
is more complex and not universally supported, but also gives access to all
other saved-mail folders at the host (e.g., sent mails, drafts, etc.).

Creates one file per host folder, each of which contains the full-text of
all the folder's messages separated by marker lines.  These files are
designed to be viewed via the "Open" button of the PyMailGUI client, coded
in the book "Programming Python, 4th Edition", and available standalone at
website "http://learning-python.com/pymailgui".  For more on using PyMailGUI,
see SavedMailboxes-yourid-yourisp.net\README.txt.  To use this script for
other clients and use cases, change its code or separator line as desired.

Uses bytes for both file names and contents: encodings unknown and variable.
The imaplib API returns structured data; reply formats are documented in
"e.g.," comments, but see imaplib and IMAP docs for the 'magic' indexes here.
==============================================================================
"""
import imaplib, getpass, sys, os


#
# configuration settings: change for your accounts (see HOW.txt for examples)
#
Ask  = False
PORT = 143
HOST = input('Host name? ')            if Ask else 'imap.yourisp.net'
USER = input('User name? ' )           if Ask else 'yourid@yourisp.net'
PSWD = getpass.getpass('Password? ' )  if Ask else 'yourpassword'

# save mailbox files here: in '.', or use absolute path
SAVEDIR = b'SavedMailboxes-yourid-yourisp.net'

# if not [], skip just these mailboxes, by save-file name
MBOXSKIP = [b'Virus-Blocker',
            b'Sent-Spam',
            b'Known-spam',
            b'Suspect-Email',
            b'Trash']

# if not [], save just these mailboxes, by save-file name (higher precedence)
MBOXSAVE = []   # [b'pp3e-errata', b'lp3e-errata', b'Old-Sent']


#
# open save-mail files with PyMailGUI's "Open" (see top docstring)
#
try:
    from PP4E.Internet.Email.PyMailGui.sharednames import saveMailSeparator
except:
    saveMailSeparator = 'PyMailGUI' + ('-'*60) + 'PyMailGUI\r\n'
saveMailSeparator = saveMailSeparator.encode()



def getMailboxNames(server):
    """
    --------------------------------------------------------------
    extract mailbox (a.k.a. folder) names from server;
    save xor skip folders per configuration list settings, if any;
    fix double-quoted names having embedded spaces: retain quotes
    for server.select, drop quotes and blanks for save-file name; 
    --------------------------------------------------------------
    """
    # e.g., ('OK', [b'...', b'...', ...])
    resp, mboxes = server.list()
    mboxnames = []
    for mboxinfo in mboxes:
        # e.g., b'(..) "." mboxname' => (b'mboxname', b'mboxname')
        mboxname = mboxinfo.split()[-1]
        savename = mboxname
        
        if b'"' in mboxname:
            # e.g., b'(..) "." "mbox name"' => (b'"mbox name"', b'mbox-name')
            mboxname = b'"' + mboxinfo.split(b'"')[-2] + b'"'
            savename = mboxname[1:-1].replace(b' ', b'-')

        if MBOXSAVE and savename not in MBOXSAVE:       # save these only
            print('Skipping mailbox:', mboxname)

        elif MBOXSKIP and savename in MBOXSKIP:         # or skip these only
            print('Skipping mailbox:', mboxname)

        else:                                           # or use all folders
            mboxnames.append((mboxname, savename))

    return mboxnames



def fetchAndSaveMessages(server, mboxnames):
    """
    ------------------------------------------------------------------
    fetch and save mailboxes of messages, one flat file per mailbox;
    use bytes name+file for outputs: no encoding for name or content;
    ------------------------------------------------------------------
    """
    for (mboxname, savename) in mboxnames:
        print('\nDownloading:', mboxname, 'to', savename, '...', flush=True)

        try:
            # use bytes for name and content
            savepath = os.path.join(SAVEDIR, savename) + b'.eml.txt'     
            savefile = open(savepath, 'wb')

            # e.g., ('OK', [b'62'])
            resp, nmsg  = server.select(mailbox=mboxname)
            nummessages = int(nmsg[0])

            # fetch/save mailbox's messages as bytes: encoding unknown
            for i in range(nummessages):
                mnum = i + 1
                if mnum % 12 == 0: print(flush=True)
                print(mnum, end=' ')
                # e.g., ('OK', [(b'1 (RFC822 {2020}', b'Return-Path:...'), b')'])
                message = server.fetch('%d:%d' % (mnum, mnum), 'RFC822')
                savefile.write(saveMailSeparator)
                savefile.write(message[1][0][1] + b'\n')
            savefile.close()
            print()
            
        except Exception as excobj:
            # many things can fail, but let ctrl-c pass
            print('\nError while downloading', mboxname)
            print(type(excobj), excobj)  # same as sys.exc_info()[0]/[1]
            print('Continuing\n')



if __name__ == '__main__':
    # make savedir if needed
    if not os.path.exists(SAVEDIR):
        os.mkdir(SAVEDIR)

    # connect to server
    server = imaplib.IMAP4(host=HOST, port=PORT)
    server.login(user=USER, password=PSWD)

    # fetch and save emails
    mboxnames = getMailboxNames(server)
    fetchAndSaveMessages(server, mboxnames)

    # wrap-up
    print('\nFinished: see mail files in', SAVEDIR)               # don't decode here either
    if sys.platform.startswith('win') and sys.stdout.isatty():    # for Windows icon clickers,
        input('Press Enter to close')                             # unless output piped to file



[Home page] Books Code Blog Python Author Train Find ©M.Lutz