File: mergeall-products/unzipped/test/test-path-normalization-3.3/test-path-normalization-walks/

[3.3] Create paths with mixed decomposed and composed Unicode
filenames, and test compares, syncs, and deltas in Mergeall 3.3.

Run in this script's folder with no command-line arguments; 
test folders are auto created in '.' (the CWD).  This script
file's content is the same in all platform subfolders.

Results for each platform are in the platform subfolders' 
_TEST-output.txt (console) and LOGS/* (run steps).  The most 
important output may be in LOGS/*deltas-apply*.txt for each 
platform, as these files show path normalization in action. 

Coding: an Oct-22 takeoff on ../../test-normalization-3.3's 
test script.  Coded here portably to run on for Unix (macOS), 
Windows, Linux, and Android (Termux).  Uses manual decoding, 
else open('name') may depend on platform policies, cut/paste,
and keyboard input for Unicode text.

Test data: this fudges the skewed Unicode-variant state that path
normalization fixes, by renaming TO's path components after a 
unique TO path has been saved in __added__.txt by a deltas create.  

How Tested:

Results here vary with the debug1/debug2 switches in Mergeall's
fixunicodedups.matchUnicodePathnames().  See that function's 
docstring for switch usage.  This reflects platform policies:

- Because macOS and Android (shared and app-specific storage) 
  auto-normalize paths with Unicode variants to match, they will
  happily sync the skewed test data here without any manual 
  normalization.  Set debug1 to True to force a normalization 
  loop (and output "--Path okay" messages), or set both switches
  to True to force the loop and component normalization (and 
  output "--Unicode morphed" messages).

- Because Windows, Linux, and Android (app-private storage) do not
  auto-normalize paths, they will run a manual normalization loop
  for the test data here, without setting Mergeall's switches to 
  True.  Windows and Linux seem the primary beneficiary of manual 
  path normalization; Android app-private is subpar for content
  storage, as it's accessible to just one app.

For runs of this script captured in this folder: 
- debug1/2 were True/False  on macOS to force loop only
- debug1/2 were True/True   on Android shared storage for loop+mods
- debug1/2 were False/False on Windows and Linux: not required
- debug1/2 were False/False on Android app-specific to skip loop
- debug1/2 were False/False on Android app-private: not required

Comment: NO platforms should auto-normalize filenames; this can
lead to duplicates in syncs that aren't as forgiving as Mergeall.

# before runs in Windows console: set PYTHONIOENCODING=utf8

import os, glob, shutil, time, sys
from os import system as cmd    

# Windows, Android, macOS, Linux
if sys.platform.startswith('win'):
    mergeall33 = r'C:\Users\lutz\Desktop\temp\mergeall-oct0922'
    python3 = 'py -3'

elif any(key.startswith("ANDROID") for key in os.environ.keys()):
    mergeall33 = r'/sdcard/work/mergeall-oct0922'
    python3 = 'python3'

elif sys.platform.startswith('darwin'):
    mergeall33 = '/Users/me/MY-STUFF/Code/mergeall'    # 3.3 dev tree
    python3 = 'python3'

elif sys.platform.startswith('linux'):
    mergeall33 = '/home/me/Desktop/temp/mergeall-oct0922'
    python3 = 'python3'

    assert False, 'Not testing here'

if os.path.exists('LOGS'):

# for alt-form filenames: {ndc, nfd} Liñux
nfc = b'Li\xc3\xb1ux'.decode('utf-8')          # decoded str, composed (NFC)
nfd = b'Lin\xcc\x83ux'.decode('utf-8')         # decoded str, decomposed (NFD)

def populate():
    # make mixed-unicode-name folders+files test data, anew

    def mkfile(name, content):                 # name is NFC|NFD, decoded
        f = open(name, 'w', encoding='utf8')   # encoding of content, not name

    path1 = [nfc+'1', 'aaa', nfd+'2', 'bbb', nfd+'3', 'ccc', nfc+'4', 'ddd']
    path2 = [nfd+'1', 'aaa', nfd+'2', 'bbb', nfc+'3', 'ccc', nfc+'4', 'ddd']

    file1 = nfc
    file2 = nfd
    # in portable relative paths
    for (folder, path, file) in [('FROM', path1, file1), ('TO', path2, file2)]:
        if os.path.exists(folder): 
        fullpath = os.path.join(folder, *path)
        for newfile in (file, 'plain.txt'):
            fullfile = os.path.join(fullpath, newfile)
            raw = newfile.encode('utf-8')
            mkfile(fullfile, content='%s, %s' % (newfile, raw))

def dump(label, deltas=False):
    # display test data's state to verify populate and syncs

    def storedpath(root):
        "get the full path stored under root, skip __bkp__"
        for (dirhere, subshere, fileshere) in os.walk(root):
            path = dirhere
            if '__bkp__' in subshere: subshere.remove('__bkp__')
        return path

    print('\n' + label.upper())

    print('\n<decoded paths>')
    for folder in ('FROM', 'TO'):
        path = storedpath(folder)
        print('  ' + path if folder == 'TO' else path)

    print('\n<encoded paths>')
    for folder in ('FROM', 'TO'):
        path = storedpath(folder)
        print([part.encode('utf8') for part in path.split(os.sep)])

    for folder in ('FROM', 'TO'):
        path = storedpath(folder)
        for file in os.listdir(path):
            rawfile = file.encode('utf8')
            filepath = os.path.join(path, file)
            print('  ' + filepath if folder == 'TO' else filepath, 
                  '-->', rawfile, 
                  '==>', open(filepath, 'rb').read())

    if deltas:
        added = open('DELTAS' + os.sep + '__added__.txt', 'rb').read()
        print(added.decode('utf8'), end='')

def compare(label):
    # compare FROM and TO with mergeall (modtimes+structure)
    # and diffall (bytewise); don't use -quiet here or in 
    # deltas tests, so see Unicode normalization messages;
    cmd(python3 + ' %s/ FROM TO -report -skipcruft'
             ' > LOGS/%s-mergeall-f-t.txt' % (mergeall33, label))

    cmd(python3 + ' %s/ FROM TO -skipcruft'
             ' > LOGS/%s-diffall-f-t.txt' % (mergeall33, label))

    # TMI...
    #cmd(python3 + ' %s/ TO FROM -report -skipcruft'
    #         ' > LOGS/%s-mergeall-t-f.txt' % (mergeall33, label))
    #cmd(python3 + ' %s/ TO FROM -skipcruft'
    #         ' > LOGS/%s-diffall-t-f.txt' % (mergeall33, label))

def modify(folder='FROM'):
    # modify data in FROM, to create diffs with TO (to sync)

    time.sleep(3)                                 # evade FAT 2 seconds tolerance range

    for (dirhere, subshere, fileshere) in os.walk(folder):
        for file in fileshere:
            filepath = os.path.join(dirhere, file)
            if file == 'plain.txt':
                # cmd('echo xxx > ' + filepath)
                new = open(filepath, 'wb')
                new.write(b'modified...')         # change modtime+size
                new.close()                       # flush now to save
                # cmd('rm ' + filepath)
                os.remove(filepath)               # make unique TO => __added__.txt entry

def sync():
    # sync trees with mergeall directly/immediately

    cmd(python3 + ' %s/ FROM TO -auto -skipcruft'
             ' > LOGS/3-sync-mergeall-33.txt' % mergeall33)

def make_to_path_differ(dodump=False):
    # rename all "Liñux?" folders in TO's path to force TO's 
    # path to differ with the prior TO path in __added__.txt, 
    # at least on platforms that do not auto-normalize; 
    # this suffices to skew __added__.txt and TO for path
    # existence tests on Windows and Linux, but not for 
    # Android or macOS, where filenames are auto-normalized,
    # and code debug flags can force loop tests (see How 
    # Tested in the top-of-file docstring);
    # on Windows and Linux, all "Liñux?" are normalized as
    # expected; the final path name is not (it's not renamed
    # because it has no digit suffix), and skipping renames 
    # for some folders also skips normalization for them;
    # this creates the state addressed by Mergeall path
    # normalization; it's complex to generate artificially,
    # and has yet to be spotted in the wild (so far...);
    join = os.path.join
    for (dir, subs, files) in os.walk('TO', topdown=False):
        for name in subs + files:
            if name[:-1] == nfc:
                newname = nfd + name[-1]
                os.rename(join(dir, name), join(dir, newname))
            elif name[:-1] == nfd:
                newname = nfc + name[-1]
                os.rename(join(dir, name), join(dir, newname))
                pass # print('skip', name)

    if dodump: dump('post make to path differ')

def deltas_create(num):
    # save FROM deltas, including unique TO in __added__.txt

    cmd(python3 + ' %s/ DELTAS FROM TO -skipcruft'
             ' > LOGS/%s-deltas-create-33.txt' % (mergeall33, num))

def deltas_apply(num):
    # apply FROM deltas to TO, with relative paths

    cmd(python3 + ' %s/ DELTAS TO -restore -auto -backup -skipcruft'   # -quiet
            ' > LOGS/%s-deltas-apply-33.txt' % (mergeall33, num))

def deltas_apply_abspath(num):
    # apply FROM deltas to TO, with absolute paths
    absde = os.path.abspath('DELTAS')
    absto = os.path.abspath('TO')

    cmd(python3 + ' %s/ %s %s -restore -auto -backup -skipcruft'   # -quiet
            ' > LOGS/%s-deltas-apply-33.txt' % (mergeall33, absde, absto, num))

if __name__ == '__main__':
    # go

    dump('initial populate')

    input('\nPress enter to mod and sync')
    dump('post mod and sync')

    input('\nPress enter to run deltas sync')
    dump('post deltas sync', deltas=True)

    input('\nPress enter to run deltas abspath sync')
    dump('post deltas abspath sync', deltas=True)

