source: noteshrink/trunk/fuentes/noteshrink.py @ 5999

Last change on this file since 5999 was 5999, checked in by kbut, 3 years ago

add package

  • Property svn:executable set to *
File size: 16.5 KB
Line 
1#!/usr/bin/env python
2
3'''Converts sequence of images to compact PDF while removing speckles,
4bleedthrough, etc.
5
6'''
7
8# for some reason pylint complains about members being undefined :(
9# pylint: disable=E1101
10
11from __future__ import print_function
12
13import sys
14import os
15import re
16import subprocess
17import shlex
18
19from argparse import ArgumentParser
20
21import numpy as np
22from PIL import Image
23from scipy.cluster.vq import kmeans, vq
24
25######################################################################
26
27def quantize(image, bits_per_channel=None):
28
29    '''Reduces the number of bits per channel in the given image.'''
30
31    if bits_per_channel is None:
32        bits_per_channel = 6
33
34    assert image.dtype == np.uint8
35
36    shift = 8-bits_per_channel
37    halfbin = (1 << shift) >> 1
38
39    return ((image.astype(int) >> shift) << shift) + halfbin
40
41######################################################################
42
43def pack_rgb(rgb):
44
45    '''Packs a 24-bit RGB triples into a single integer,
46works on both arrays and tuples.'''
47
48    orig_shape = None
49
50    if isinstance(rgb, np.ndarray):
51        assert rgb.shape[-1] == 3
52        orig_shape = rgb.shape[:-1]
53    else:
54        assert len(rgb) == 3
55        rgb = np.array(rgb)
56
57    rgb = rgb.astype(int).reshape((-1, 3))
58
59    packed = (rgb[:, 0] << 16 |
60              rgb[:, 1] << 8 |
61              rgb[:, 2])
62
63    if orig_shape is None:
64        return packed
65    else:
66        return packed.reshape(orig_shape)
67
68######################################################################
69
70def unpack_rgb(packed):
71
72    '''Unpacks a single integer or array of integers into one or more
7324-bit RGB values.
74
75    '''
76
77    orig_shape = None
78
79    if isinstance(packed, np.ndarray):
80        assert packed.dtype == int
81        orig_shape = packed.shape
82        packed = packed.reshape((-1, 1))
83
84    rgb = ((packed >> 16) & 0xff,
85           (packed >> 8) & 0xff,
86           (packed) & 0xff)
87
88    if orig_shape is None:
89        return rgb
90    else:
91        return np.hstack(rgb).reshape(orig_shape + (3,))
92
93######################################################################
94
95def get_bg_color(image, bits_per_channel=None):
96
97    '''Obtains the background color from an image or array of RGB colors
98by grouping similar colors into bins and finding the most frequent
99one.
100
101    '''
102
103    assert image.shape[-1] == 3
104
105    quantized = quantize(image, bits_per_channel).astype(int)
106    packed = pack_rgb(quantized)
107
108    unique, counts = np.unique(packed, return_counts=True)
109
110    packed_mode = unique[counts.argmax()]
111
112    return unpack_rgb(packed_mode)
113
114######################################################################
115
116def rgb_to_sv(rgb):
117
118    '''Convert an RGB image or array of RGB colors to saturation and
119value, returning each one as a separate 32-bit floating point array or
120value.
121
122    '''
123
124    if not isinstance(rgb, np.ndarray):
125        rgb = np.array(rgb)
126
127    axis = len(rgb.shape)-1
128    cmax = rgb.max(axis=axis).astype(np.float32)
129    cmin = rgb.min(axis=axis).astype(np.float32)
130    delta = cmax - cmin
131
132    saturation = delta.astype(np.float32) / cmax.astype(np.float32)
133    saturation = np.where(cmax == 0, 0, saturation)
134
135    value = cmax/255.0
136
137    return saturation, value
138
139######################################################################
140
141def postprocess(output_filename, options):
142
143    '''Runs the postprocessing command on the file provided.'''
144
145    assert options.postprocess_cmd
146
147    base, _ = os.path.splitext(output_filename)
148    post_filename = base + options.postprocess_ext
149
150    cmd = options.postprocess_cmd
151    cmd = cmd.replace('%i', output_filename)
152    cmd = cmd.replace('%o', post_filename)
153    cmd = cmd.replace('%e', options.postprocess_ext)
154
155    subprocess_args = shlex.split(cmd)
156
157    if os.path.exists(post_filename):
158        os.unlink(post_filename)
159
160    if not options.quiet:
161        print('  running "{}"...'.format(cmd), end=' ')
162        sys.stdout.flush()
163
164    try:
165        result = subprocess.call(subprocess_args)
166        before = os.stat(output_filename).st_size
167        after = os.stat(post_filename).st_size
168    except OSError:
169        result = -1
170
171    if result == 0:
172
173        if not options.quiet:
174            print('{:.1f}% reduction'.format(
175                100*(1.0-float(after)/before)))
176
177        return post_filename
178
179    else:
180
181        sys.stderr.write('warning: postprocessing failed!\n')
182        return None
183
184######################################################################
185
186def percent(string):
187    '''Convert a string (i.e. 85) to a fraction (i.e. .85).'''
188    return float(string)/100.0
189
190######################################################################
191
192def get_argument_parser():
193
194    '''Parse the command-line arguments for this program.'''
195
196    parser = ArgumentParser(
197        description='convert scanned, hand-written notes to PDF')
198
199    show_default = ' (default %(default)s)'
200
201    parser.add_argument('filenames', metavar='IMAGE', nargs='+',
202                        help='files to convert')
203
204    parser.add_argument('-q', dest='quiet', action='store_true',
205                        default=False,
206                        help='reduce program output')
207
208    parser.add_argument('-b', dest='basename', metavar='BASENAME',
209                        default='page',
210                        help='output PNG filename base' + show_default)
211
212    parser.add_argument('-o', dest='pdfname', metavar='PDF',
213                        default='output.pdf',
214                        help='output PDF filename' + show_default)
215
216    parser.add_argument('-v', dest='value_threshold', metavar='PERCENT',
217                        type=percent, default='25',
218                        help='background value threshold %%'+show_default)
219
220    parser.add_argument('-s', dest='sat_threshold', metavar='PERCENT',
221                        type=percent, default='20',
222                        help='background saturation '
223                        'threshold %%'+show_default)
224
225    parser.add_argument('-n', dest='num_colors', type=int,
226                        default='8',
227                        help='number of output colors '+show_default)
228
229    parser.add_argument('-p', dest='sample_fraction',
230                        metavar='PERCENT',
231                        type=percent, default='5',
232                        help='%% of pixels to sample' + show_default)
233
234    parser.add_argument('-w', dest='white_bg', action='store_true',
235                        default=False, help='make background white')
236
237    parser.add_argument('-g', dest='global_palette',
238                        action='store_true', default=False,
239                        help='use one global palette for all pages')
240
241    parser.add_argument('-S', dest='saturate', action='store_false',
242                        default=True, help='do not saturate colors')
243
244    parser.add_argument('-K', dest='sort_numerically',
245                        action='store_false', default=True,
246                        help='keep filenames ordered as specified; '
247                        'use if you *really* want IMG_10.png to '
248                        'precede IMG_2.png')
249
250    parser.add_argument('-P', dest='postprocess_cmd', default=None,
251                        help='set postprocessing command (see -O, -C, -Q)')
252
253    parser.add_argument('-e', dest='postprocess_ext',
254                        default='_post.png',
255                        help='filename suffix/extension for '
256                        'postprocessing command')
257
258    parser.add_argument('-O', dest='postprocess_cmd',
259                        action='store_const',
260                        const='optipng -silent %i -out %o',
261                        help='same as -P "%(const)s"')
262
263    parser.add_argument('-C', dest='postprocess_cmd',
264                        action='store_const',
265                        const='pngcrush -q %i %o',
266                        help='same as -P "%(const)s"')
267
268    parser.add_argument('-Q', dest='postprocess_cmd',
269                        action='store_const',
270                        const='pngquant --ext %e %i',
271                        help='same as -P "%(const)s"')
272
273    parser.add_argument('-c', dest='pdf_cmd', metavar="COMMAND",
274                        default='convert %i %o',
275                        help='PDF command (default "%(default)s")')
276
277    return parser
278
279######################################################################
280
281def get_filenames(options):
282
283    '''Get the filenames from the command line, optionally sorted by
284number, so that IMG_10.png is re-arranged to come after IMG_9.png.
285This is a nice feature because some scanner programs (like Image
286Capture on Mac OS X) automatically number files without leading zeros,
287and this way you can supply files using a wildcard and still have the
288pages ordered correctly.
289
290    '''
291
292    if not options.sort_numerically:
293        return options.filenames
294
295    filenames = []
296
297    for filename in options.filenames:
298        basename = os.path.basename(filename)
299        root, _ = os.path.splitext(basename)
300        matches = re.findall(r'[0-9]+', root)
301        if matches:
302            num = int(matches[-1])
303        else:
304            num = -1
305        filenames.append((num, filename))
306
307    return [fn for (_, fn) in sorted(filenames)]
308
309######################################################################
310
311def load(input_filename):
312
313    '''Load an image with Pillow and convert it to numpy array. Also
314returns the image DPI in x and y as a tuple.'''
315
316    try:
317        pil_img = Image.open(input_filename)
318    except IOError:
319        sys.stderr.write('warning: error opening {}\n'.format(
320            input_filename))
321        return None, None
322
323    if pil_img.mode != 'RGB':
324        pil_img = pil_img.convert('RGB')
325
326    if 'dpi' in pil_img.info:
327        dpi = pil_img.info['dpi']
328    else:
329        dpi = (300, 300)
330
331    img = np.array(pil_img)
332
333    return img, dpi
334
335######################################################################
336
337def sample_pixels(img, options):
338
339    '''Pick a fixed percentage of pixels in the image, returned in random
340order.'''
341
342    pixels = img.reshape((-1, 3))
343    num_pixels = pixels.shape[0]
344    num_samples = int(num_pixels*options.sample_fraction)
345
346    idx = np.arange(num_pixels)
347    np.random.shuffle(idx)
348
349    return pixels[idx[:num_samples]]
350
351######################################################################
352
353def get_fg_mask(bg_color, samples, options):
354
355    '''Determine whether each pixel in a set of samples is foreground by
356comparing it to the background color. A pixel is classified as a
357foreground pixel if either its value or saturation differs from the
358background by a threshold.'''
359
360    s_bg, v_bg = rgb_to_sv(bg_color)
361    s_samples, v_samples = rgb_to_sv(samples)
362
363    s_diff = np.abs(s_bg - s_samples)
364    v_diff = np.abs(v_bg - v_samples)
365
366    return ((v_diff >= options.value_threshold) |
367            (s_diff >= options.sat_threshold))
368
369######################################################################
370
371def get_palette(samples, options, return_mask=False, kmeans_iter=40):
372
373    '''Extract the palette for the set of sampled RGB values. The first
374palette entry is always the background color; the rest are determined
375from foreground pixels by running K-means clustering. Returns the
376palette, as well as a mask corresponding to the foreground pixels.
377
378    '''
379
380    if not options.quiet:
381        print('  getting palette...')
382
383    bg_color = get_bg_color(samples, 6)
384
385    fg_mask = get_fg_mask(bg_color, samples, options)
386
387    centers, _ = kmeans(samples[fg_mask].astype(np.float32),
388                        options.num_colors-1,
389                        iter=kmeans_iter)
390
391    palette = np.vstack((bg_color, centers)).astype(np.uint8)
392
393    if not return_mask:
394        return palette
395    else:
396        return palette, fg_mask
397
398######################################################################
399
400def apply_palette(img, palette, options):
401
402    '''Apply the pallete to the given image. The first step is to set all
403background pixels to the background color; then, nearest-neighbor
404matching is used to map each foreground color to the closest one in
405the palette.
406
407    '''
408
409    if not options.quiet:
410        print('  applying palette...')
411
412    bg_color = palette[0]
413
414    fg_mask = get_fg_mask(bg_color, img, options)
415
416    orig_shape = img.shape
417
418    pixels = img.reshape((-1, 3))
419    fg_mask = fg_mask.flatten()
420
421    num_pixels = pixels.shape[0]
422
423    labels = np.zeros(num_pixels, dtype=np.uint8)
424
425    labels[fg_mask], _ = vq(pixels[fg_mask], palette)
426
427    return labels.reshape(orig_shape[:-1])
428
429######################################################################
430
431def save(output_filename, labels, palette, dpi, options):
432
433    '''Save the label/palette pair out as an indexed PNG image.  This
434optionally saturates the pallete by mapping the smallest color
435component to zero and the largest one to 255, and also optionally sets
436the background color to pure white.
437
438    '''
439
440    if not options.quiet:
441        print('  saving {}...'.format(output_filename))
442
443    if options.saturate:
444        palette = palette.astype(np.float32)
445        pmin = palette.min()
446        pmax = palette.max()
447        palette = 255 * (palette - pmin)/(pmax-pmin)
448        palette = palette.astype(np.uint8)
449
450    if options.white_bg:
451        palette = palette.copy()
452        palette[0] = (255, 255, 255)
453
454    output_img = Image.fromarray(labels, 'P')
455    output_img.putpalette(palette.flatten())
456    output_img.save(output_filename, dpi=dpi)
457
458######################################################################
459
460def get_global_palette(filenames, options):
461
462    '''Fetch the global palette for a series of input files by merging
463their samples together into one large array.
464
465    '''
466
467    input_filenames = []
468
469    all_samples = []
470
471    if not options.quiet:
472        print('building global palette...')
473
474    for input_filename in filenames:
475
476        img, _ = load(input_filename)
477        if img is None:
478            continue
479
480        if not options.quiet:
481            print('  processing {}...'.format(input_filename))
482
483        samples = sample_pixels(img, options)
484        input_filenames.append(input_filename)
485        all_samples.append(samples)
486
487    num_inputs = len(input_filenames)
488
489    all_samples = [s[:int(round(float(s.shape[0])/num_inputs))]
490                   for s in all_samples]
491
492    all_samples = np.vstack(tuple(all_samples))
493
494    global_palette = get_palette(all_samples, options)
495
496    if not options.quiet:
497        print('  done\n')
498
499    return input_filenames, global_palette
500
501######################################################################
502
503def emit_pdf(outputs, options):
504
505    '''Runs the PDF conversion command to generate the PDF.'''
506
507    cmd = options.pdf_cmd
508    cmd = cmd.replace('%o', options.pdfname)
509    if len(outputs) > 2:
510        cmd_print = cmd.replace('%i', ' '.join(outputs[:2] + ['...']))
511    else:
512        cmd_print = cmd.replace('%i', ' '.join(outputs))
513    cmd = cmd.replace('%i', ' '.join(outputs))
514
515    if not options.quiet:
516        print('running PDF command "{}"...'.format(cmd_print))
517
518    try:
519        result = subprocess.call(shlex.split(cmd))
520    except OSError:
521        result = -1
522
523    if result == 0:
524        if not options.quiet:
525            print('  wrote', options.pdfname)
526    else:
527        sys.stderr.write('warning: PDF command failed\n')
528
529######################################################################
530
531def notescan_main(options):
532
533    '''Main function for this program when run as script.'''
534
535    filenames = get_filenames(options)
536
537    outputs = []
538
539    do_global = options.global_palette and len(filenames) > 1
540
541    if do_global:
542        filenames, palette = get_global_palette(filenames, options)
543
544    do_postprocess = bool(options.postprocess_cmd)
545
546    for input_filename in filenames:
547
548        img, dpi = load(input_filename)
549        if img is None:
550            continue
551
552        output_filename = '{}{:04d}.png'.format(
553            options.basename, len(outputs))
554
555        if not options.quiet:
556            print('opened', input_filename)
557
558        if not do_global:
559            samples = sample_pixels(img, options)
560            palette = get_palette(samples, options)
561
562        labels = apply_palette(img, palette, options)
563
564        save(output_filename, labels, palette, dpi, options)
565
566        if do_postprocess:
567            post_filename = postprocess(output_filename, options)
568            if post_filename:
569                output_filename = post_filename
570            else:
571                do_postprocess = False
572
573        outputs.append(output_filename)
574
575        if not options.quiet:
576            print('  done\n')
577
578    emit_pdf(outputs, options)
579
580######################################################################
581
582def main():
583    '''Parse args and call notescan_main().'''
584    notescan_main(options=get_argument_parser().parse_args())
585
586if __name__ == '__main__':
587    main()
Note: See TracBrowser for help on using the repository browser.