File: mergeall-products/unzipped/dirdiff.py

#!/usr/bin/python
"""
=================================================================================
Usage:
    [py[thon]] dirdiff.py dir1-path dir2-path
    
Compare two directories to find files that exist in one but not the other.
This version uses the os.listdir function and list difference.  Note that
this script checks only filenames, not file contents--see diffall.py for an
extension that does the latter by comparing .read() results.

New Sep-2016: changed difference labels slightly, so users can search the
report for uppercase '*UNIQUE' and '*DIFFERS' to inspect differences quickly.

New Mar-2017: use FWP() to fix long path names on Windows, but don't change
user message in the process (else could minimize number of calls).

New Dec-2011, [3.3]: normalize Unicode in filenames for script-mode use. 
When used as a module, callers are expected to normalize names instead.

New Dec-2021, [3.3]: moved intersect() to this file from diffall.py, both for 
cohesion, and to break a mergeall<==>diffall cyclic import (but the latter was
made moot when the importee moved to fixunicodedups.py).  Also spruced up docs.
=================================================================================
"""

from __future__ import print_function         # ADDED: 2.X compatibility

import os, sys

# [3.0] fix too-long paths on Windows 
from fixlongpaths import FWP

# [3.3] normalize Unicode for comparisons
from fixunicodedups import normalizeUnicode



def reportdiffs(unique1, unique2, dir1, dir2):
    """
    ---------------------------------------------------------------------------
    Generate diffs report for one dir: part of comparedirs output.
    ---------------------------------------------------------------------------
    """
    if not (unique1 or unique2):
        print('Directory lists are identical')
    else:
        if unique1:
            print('*UNIQUE items in %s:' % dir1)
            for file in unique1:
                print('...', file)
        if unique2:
            print('*UNIQUE items in %s:' % dir2)
            for file in unique2:
                print('...', file)



def intersect(seq1, seq2):
    """
    ---------------------------------------------------------------------------
    Return all items in both seq1 and seq2.
    A set(seq1) & set(seq2) would work too, but sets are randomly 
    ordered, so any platform-dependent directory order would be lost.
    [3.3] Assumes seq1/seq2 reflect Unicode normalization if needed.
    [3.3] Moved here from diffall.py for cohesion; no used if script.
    ---------------------------------------------------------------------------
    """
    return [item for item in seq1 if item in seq2]



def difference(seq1, seq2):
    """
    ---------------------------------------------------------------------------
    Return all items in seq1 only.
    A set(seq1) - set(seq2) would work too, but sets are randomly 
    ordered, so any platform-dependent directory order would be lost.
    [3.3] Assumes seq1/seq2 reflect Unicode normalization if needed.
    ---------------------------------------------------------------------------
    """
    return [item for item in seq1 if item not in seq2]



def comparedirs(dir1, dir2, files1=None, files2=None):
    """
    ---------------------------------------------------------------------------
    Compare directory contents, but not actual files.
    May need bytes listdir arg for undecodable filenames on some platforms.
    [3.3] Normalize Unicode if file lists are None, else caller must do so.
    ---------------------------------------------------------------------------
    """
    print('Comparing', dir1, 'to', dir2)

    if files1 is None:
        files1 = os.listdir(FWP(dir1))
        files1 = [normalizeUnicode(file) for file in files1]

    if files2 is None:
        files2 = os.listdir(FWP(dir2))
        files2 = [normalizeUnicode(file) for file in files2]

    unique1 = difference(files1, files2)
    unique2 = difference(files2, files1)
    reportdiffs(unique1, unique2, dir1, dir2)
    return not (unique1 or unique2)               # true if no diffs



def getargs():
    """
    ---------------------------------------------------------------------------
    Args for command-line mode.
    ---------------------------------------------------------------------------
    """
    try:
        dir1, dir2 = sys.argv[1:]                 # 2 command-line args
    except:
        print('Usage: [py[thon]] dirdiff.py dir1 dir2')
        sys.exit(1)
    else:
        return (dir1, dir2)



if __name__ == '__main__':
    """
    ---------------------------------------------------------------------------
    Main script-execution logic (not when imported).
    ---------------------------------------------------------------------------
    """
    dir1, dir2 = getargs()
    comparedirs(dir1, dir2)



[Home page] Books Code Blog Python Author Train Find ©M.Lutz