[Coco] [Color Computer] Re: pdf to djvu conversion solved

Jeff Teunissen deek at d2dc.net
Tue May 19 18:18:07 EDT 2009


stinger30au wrote:
> --- In ColorComputer at yahoogroups.com, "stinger30au" <stinger30au at ...> wrote:
>>
>> ok,
>> just did another test. same file but i increased the dpi from 300 to 400 for the djvu file
>> 
>> the file size for the djvu file jumped from 16 gig to 25 gig but *MAN* the text quality is spot on now. now more blurry text.
>> 
>> yippie.
>> 
>> not bad from my original pdf of 96 megabyte
>> 
>> so if anyone wants to go nuts converting pdf files to djvu files please do so, but do it at 400 dpi not 300. 

Here is a quick copy of the scripts I'm using to convert PDF to DjVu. It's
done on a page-by-page basis, so that I can have a little more control over
what's going into the foreground and background chunks. On the first pass, I
set "thresh" to 3 in the python file and run "shrinkpdf2 *.jpg", then look at
each of the djvu files that were created. I take note of the pages that are
screwed up, then start tweaking for them (leaving the pages that were OK alone).

The resulting files are only a little smaller than what's produced by the
Lizardtech program (and not as good), but they're quite a bit smaller than
pdf2djvu's output and the quality is much better (yes, they're both smaller
AND better).

For those who are interested in improving the process, I believe that after
splitting up the channels, using the average color and the standard deviation
of each channel might help in getting even smaller files and higher quality.
But it's just a hunch.

~/bin/shrinkpdf2:

#! /bin/bash

set -e

me=$(basename $0)

function setsize () {
      width=$1
      height=$2
}

function pdftoppm () {
      local page=$1
      local infile=$2
      local outfile=$3

      pdfimages -j -f $page -l $page "$infile" "$page"
      convert $page-* -append ppm:- | pnmdepth 255 > "$outfile"
      rm $page-*
}

function makemask3 () {
      [ -z "$GIMPOPTS" ] && GIMPOPTS="idf"
      gimp -${GIMPOPTS}sb "(python-fu-maskconvert-cmyk RUN-NONINTERACTIVE \"$1\" $DPI $HQ)"
}

function makeparts () {
      fgquality=96
      if [ $HQ -ne 0 ]; then
          fgquality=102
      fi
      c44 -slice $fgquality -mask "$1.fgmask" "$1.fgppm" "$1.fgtmp"
      c44 -slice 72+11+10+10 -mask "$1.bgmask" "$1.bgppm" "$1.bgtmp"

      djvuextract "$1.fgtmp" "BG44=$1.fg"
      djvuextract "$1.bgtmp" "BG44=$1.bg"

      cjb2 -clean $1.mask $1.jb2
}

function delfiles () {
      [ -z "$SAVE" ] && rm "$1".*
      true
}

files=""
[ ! -z "$1" ] && files="$@"
[ -z "$DPI" ] && DPI=300
[ -z "$HQ" ] && HQ=0

for arg in $files;
do
      case $arg in
          *.jpg)
              ext="jpg"
              ;;
          *.jpeg)
              ext="jpeg"
              ;;
          *.ppm)
              ext="ppm"
              ;;
          *.tif)
              ext="tif"
              ;;
          *.tiff)
              ext="tiff"
              ;;
      esac
      base=$(basename "$arg" .$ext)
      djvu="$base.djvu"
      echo "$arg => $djvu"
      makemask3 "$arg"
      makeparts "$arg"
      djvumake "$djvu" INFO=,,$DPI "Sjbz=$arg.jb2" "FG44=$arg.fg" BG44=$arg.bg
      if [ -f "$base.dsed" ]; then
          djvused $djvu -f "$base.dsed" -s
      fi
      delfiles "$arg"
done

.gimp-2.6/plug-ins/convert_to_cmyk_mask.py:

#! /usr/bin/env python

from gimpfu import *

import glob
import math
import os

global mask

def linear_fraction (frac, maximum=255): return round (frac * float(maximum))
def scale_factor (dpi, wanted): return max (1, min (math.floor (dpi /
float(wanted)), 12))
def min_frac (denominator): return 1 / float(denominator)
def max_frac (denominator): return float(denominator - 1) / float(denominator)

def save (img, basename):
      pdb.gimp_image_flatten (img)

      if not img.active_drawable.is_rgb:  # it's grayscale, a mask
          temp = basename + '.tmp'
          pdb.file_pgm_save (img, img.active_drawable, temp, temp, 1)
          os.system ("pgmtopbm " + temp + ">" + basename)
          os.remove (temp)
      else:
          pdb.file_ppm_save (img, img.active_drawable, basename, basename, 1)

def scale_and_save_cmyk (name, img, factor, notFG):
      global mask
      global thresh

      width = math.ceil (img.width / factor)  # always round up, djvulibre tools do too
      height = math.ceil (img.height / factor)
      lmask = 0

      pdb.gimp_edit_copy (img.active_drawable)
      new = pdb.gimp_edit_paste_as_new ()
      new.disable_undo ()
#     disp = gimp.Display (new)

      if img == mask:             # we're scaling the mask itself, do some processing
          frac = 1.0 - thresh     # we use the opposite threshold here
          if notFG == True:
              frac += 0.03        # add 3% more to the background mask

          pdb.gimp_threshold (new.active_drawable, linear_fraction (frac), 255)

          if factor != 1:
              pdb.gimp_image_scale_full (new, width, height, INTERPOLATION_LANCZOS)
              pdb.gimp_threshold (new.active_drawable, linear_fraction (1.0 - (1.0 / float(factor))), 255)

          if notFG:               # invert the mask if it's not the foreground
              pdb.gimp_invert (new.active_drawable)
      else:
          if factor != 1:
              pdb.gimp_image_scale_full (new, width, height, INTERPOLATION_LANCZOS)

      save (new, name)

#     del disp
      del new

def save_scaled_files (name, dpi):
      global mask, fg, bg, hq

      wanted_fg_dpi = 100
      wanted_bg_dpi = 50
      if hq:
          wanted_bg_dpi = 100

      bg_scale = scale_factor (dpi, wanted_bg_dpi)
      fg_scale = scale_factor (dpi, wanted_fg_dpi)

      scale_and_save_cmyk (name + ".mask", mask, 1, 2)
      scale_and_save_cmyk (name + ".fgmask", mask, fg_scale, False)
      scale_and_save_cmyk (name + ".bgmask", mask, bg_scale, True)
      scale_and_save_cmyk (name + ".fgppm", fg, fg_scale, False)
      scale_and_save_cmyk (name + ".bgppm", bg, bg_scale, True)

def convert_to_cmyk_mask (pathglob, dpi=300, fine=False):
      global mask, fg, bg
      global thresh
      global hq

      hq = fine

      # This is the fraction of black we want to pull out of the background and put into the foreground.
      # We express this as the denominator of a fraction, where the numerator is one less. That is,
      # max_frac(4) == 3/4 or .75 and max_frac(32) == 31/32 or 0.96875
      thresh = max_frac (3)           # Use this for "normal"
#     thresh = max_frac (2)           # to get light gray OUT of the foreground
#     thresh = max_frac (4)           # More darkness, fine lines
#     thresh = max_frac (5)           # usually gets "dark cyan" into the foreground
#     thresh = max_frac (6)           # Front page of last Rainbow
#     thresh = max_frac (7)
#     thresh = max_frac (8)
#     thresh = max_frac (9)           # This works _OK_ for the tabloids, mostly
      # Custom values for tweaking
#     thresh = 0.55
#     thresh = 1.0/4.0

      flist = glob.glob (pathglob)
      for curr in flist:
          fg = pdb.gimp_file_load (curr, curr)
          fg.disable_undo ()
#         fgd = gimp.Display (fg)

          # cropping, don't leave this in
#         fg.crop (fg.width - 150, fg.height - 72, 0, 0)

          # Decompose to colors + black
          (c, m, y, k) = pdb.plug_in_decompose (fg, fg.active_drawable, 'CMYK', False)

          k.disable_undo ()
#         kd = gimp.Display (k)

          pdb.gimp_edit_copy (k.active_drawable)
          mask = pdb.gimp_edit_paste_as_new ()

          mask.disable_undo ()    # save some memory
#         maskd = gimp.Display (mask)

          # Smooth out the "bumps" and then fix the contrast
          pdb.gimp_invert (mask.active_drawable)
          pdb.plug_in_sel_gauss (mask, mask.active_drawable, 7.0, 48)
          pdb.gimp_levels_stretch (mask.active_drawable)

          # Use the mask to remove SOME of the black from the black channel,
          # then reassemble it into the background.
          layer = k.active_layer
          lmask = layer.create_mask (ADD_WHITE_MASK)
          layer.add_mask (lmask)
          pdb.gimp_edit_copy (mask.active_drawable)
          pdb.gimp_floating_sel_anchor (pdb.gimp_edit_paste (lmask, True))
          pdb.gimp_threshold (lmask, linear_fraction (thresh), 255)

          # We want the background color to be black temporarily
          pdb.gimp_context_swap_colors ()
          pdb.gimp_image_flatten (k)
          pdb.gimp_context_swap_colors ()

          bg = pdb.plug_in_compose (c, c.active_drawable, m, y, k, "CMYK")
#         bgd = gimp.Display (bg)

          del (c, m, y, k)
#         del (kd)

          # Save and clean up
          pdb.gimp_invert (mask.active_drawable)
          save_scaled_files (curr, dpi)

          del (fg, bg, mask)
#         del (fgd, bgd, maskd)

      pdb.gimp_quit (1)

register (
      'maskconvert-cmyk',
      'Convert an RGB image to set of images and masks for DjVu conversion (CMYK version)',
      'Convert an RGB image to set of images and masks for DjVu conversion (CMYK version)',
      'Jeff Teunissen',
      'Jeff Teunissen',
      '2009',
      'CMYK Dj_Vu Mask',
      None,
      [
          (PF_STRING, 'pathglob', 'Shell-style glob mask', '*.jpg'),
          (PF_INT32, 'dpi', 'Starting DPI', 300),
          (PF_INT32, 'fine', 'Use fine quality', 0)
      ],
      [],
      convert_to_cmyk_mask,
      menu='<Image>/File'
      )

main ()



More information about the Coco mailing list