User:Inductiveload/Scripts/Image splitter

Script to split a directory of images into halves, useful for when books have been scanned as two-page spreads. The images should be in the format "*0001.ext" and so on.

Parameters edit

The script does not take any command line parameters, but it does have some internal parameters you should set before running the script.

  • DIR: the image directory to process
  • SPLIT_POINT: the proportion of the image horizontally to split at. 0.5 for evenly proportioned pages.
  • OVERLAP: proportion of image to overlap, useful for where the images don't have a perfectly consistent split point.
  • EXCLUDE_PAGES: list of pages that do not need to be split.

Source code edit

#!/usr/bin/env python

import os
import re
import Image

DIR = '/media/MJ/wiki/melb_riots'
SPLIT_POINT = 0.50 #proportion of width to split at
OVERLAP = 0.01

EXCLUDE_PAGES = [ 0 ] 

def split(filename):
    
    print('(INF): Attempting to split file: %s'% filename)

    m = re.search('(.*?)(\d+)(\..*)$', filename)

    if m:
        prefix, number, ext = m.group(1), int(m.group(2)), m.group(3)

    else:
        print('\t(ERR): File numbering not found: %s'% filename)
        return None
        
    if number in EXCLUDE_PAGES:
        print('\t(INF): Skipping page: %d'% number)
        return None

    if ext.lower() not in ['.jpg', '.gif', '.png', '.tif', '.tiff', '.bmp']:
        print('\t(ERR): File type not splittable: %s'% ext)
        return None


    image = Image.open( filename )

    width = image.size[0]
    height = image.size[1]

    leftImage  = image.crop( (0, 0, int(width*(SPLIT_POINT+OVERLAP)), height) ) #left, upper, right, and lower
    rightImage = image.crop( (int(width*(SPLIT_POINT-OVERLAP)), 0, width, height) ) #left, upper, right, and lower

    number = number * 2
    leftImage.save( '%s%04d%s' % (prefix, number-1, ext) )
    rightImage.save( '%s%04d%s' % (prefix, number, ext) )

def run():

    filelist = [os.path.join(DIR, filename) for filename in sorted(os.listdir(DIR))]
    filelist.reverse() #do in reverse order to prevent overwriting lower numbers

    for infile in filelist:  #for every file in the directory (absolute dir)

        if os.path.isfile(infile):
            split(infile)


if __name__ == "__main__":
    run()