aboutsummaryrefslogtreecommitdiff
path: root/infra/libkookie/nixpkgs/pkgs/applications/misc/k2pdfopt/default.nix
blob: 75e467d4cdf8af962a74f7cd1f32eb189d8957f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
{ stdenv, runCommand, fetchzip, fetchurl, fetchpatch, fetchFromGitHub
, cmake, pkgconfig, zlib, libpng, makeWrapper
, enableGSL ? true, gsl
, enableGhostScript ? true, ghostscript
, enableMuPDF ? true, mupdf
, enableDJVU ? true, djvulibre
, enableGOCR ? false, gocr # Disabled by default due to crashes
, enableTesseract ? true, leptonica, tesseract4
}:

with stdenv.lib;

# k2pdfopt is a pain to package. It requires modified versions of mupdf,
# leptonica, and tesseract.  Instead of shipping patches for these upstream
# packages, k2pdfopt includes just the modified source files for these
# packages.  The individual files from the {mupdf,leptonica,tesseract}_mod/
# directories are intended to replace the corresponding source files in the
# upstream packages, for a particular version of that upstream package.
#
# There are a few ways we could approach packaging these modified versions of
# mupdf, leptonica, and mupdf:
# 1) Override the upstream source with a new derivation that involves copying
# the modified source files from k2pdfopt and replacing the corresponding
# source files in the upstream packages. Since the files are intended for a
# particular version of the upstream package, this would not allow us to easily
# use updates to those packages in nixpkgs.
# 2) Manually produce patches which can be applied against the upstream
# project, and have the same effect as replacing those files.  This is what I
# believe k2pdfopt should do this for us anyway.  The benefit of creating and
# applying patches in this way is that minor updates (esp. security fixes) to
# upstream packages might still allow these patches to apply successfully.
# 3) Automatically produce these patches inside a nix derivation. This is the
# approach taken here, using the "mkPatch" provided below.  This has the
# benefit of easier review and should hopefully be simpler to update in the
# future.

let
  # Create a patch against src based on changes applied in patchCommands
  mkPatch = { name, src, patchCommands }: runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
    source $stdenv/setup
    unpackPhase

    orig=$sourceRoot
    new=$sourceRoot-modded
    cp -r $orig/. $new/

    pushd $new >/dev/null
    ${patchCommands}
    popd >/dev/null

    diff -Naur $orig $new > $out || true
  '';

  pname = "k2pdfopt";
  version = "2.53";
  k2pdfopt_src = fetchzip {
    url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip";
    sha256 = "1fna8bg3pascjfc3hmc6xn0xi2yh7f1qp0d344mw9hqanbnykyy8";
  };
in stdenv.mkDerivation rec {
  inherit pname version;
  src = k2pdfopt_src;

  patches = [
    ./0001-Fix-CMakeLists.patch
  ];

  postPatch = ''
    substituteInPlace willuslib/bmpdjvu.c \
      --replace "<djvu.h>" "<libdjvu/ddjvuapi.h>"
  '';

  nativeBuildInputs = [ cmake pkgconfig makeWrapper ];

  buildInputs =
  let
    # We use specific versions of these sources below to match the versions
    # used in the k2pdfopt source. Note that this does _not_ need to match the
    # version used elsewhere in nixpkgs, since it is only used to create the
    # patch that can then be applied to the version in nixpkgs.
    mupdf_patch = mkPatch {
      name = "mupdf";
      src = fetchurl {
        url = "https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz";
        sha256 = "13nl9nrcx2awz9l83mlv2psi1lmn3hdnfwxvwgwiwbxlkjl3zqq0";
      };
      patchCommands = ''
        cp ${k2pdfopt_src}/mupdf_mod/{filter-basic,font,stext-device,string}.c ./source/fitz/
        cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/
      '';
    };
    mupdf_modded = mupdf.overrideAttrs ({ patches ? [], ... }: {
      patches = patches ++ [ mupdf_patch ];
      # This function is missing in font.c, see font-win32.c
      postPatch = ''
        echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c
      '';
    });

    leptonica_patch = mkPatch {
      name = "leptonica";
      src = fetchurl {
        url = "http://www.leptonica.org/source/leptonica-1.79.0.tar.gz";
        sha256 = "1n004gv1dj3pq1fcnfdclvvx5nang80336aa67nvs3nnqp4ncn84";
      };
      patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/";
    };
    leptonica_modded = leptonica.overrideAttrs ({ patches ? [], ... }: {
      patches = patches ++ [ leptonica_patch ];
    });

    tesseract_patch = mkPatch {
      name = "tesseract";
      src = fetchFromGitHub {
        owner = "tesseract-ocr";
        repo = "tesseract";
        rev = "4.1.1";
        sha256 = "1ca27zbjpx35nxh9fha410z3jskwyj06i5hqiqdc08s2d7kdivwn";
      };
      patchCommands = ''
        cp ${k2pdfopt_src}/tesseract_mod/{baseapi,tesscapi,tesseract}.* src/api/
        cp ${k2pdfopt_src}/tesseract_mod/{tesscapi,tessedit,tesseract}.* src/ccmain/
        cp ${k2pdfopt_src}/tesseract_mod/dotproduct{avx,fma,sse}.* src/arch/
        cp ${k2pdfopt_src}/tesseract_mod/{intsimdmatrixsse,simddetect}.* src/arch/
        cp ${k2pdfopt_src}/tesseract_mod/{errcode,genericvector,mainblk,params,serialis,tessdatamanager,tess_version,tprintf,unicharset}.* src/ccutil/
        cp ${k2pdfopt_src}/tesseract_mod/{input,lstmrecognizer}.* src/lstm/
        cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/
      '';
    };
    tesseract_modded = tesseract4.override {
      tesseractBase = tesseract4.tesseractBase.overrideAttrs ({ patches ? [], ... }: {
        patches = patches ++ [ tesseract_patch ];
        # Additional compilation fixes
        postPatch = ''
          echo libtesseract_api_la_SOURCES += tesscapi.cpp >> src/api/Makefile.am
          substituteInPlace src/api/tesseract.h \
            --replace "#include <leptonica.h>" "//#include <leptonica.h>"
        '';
      });
    };
  in
    [ zlib libpng ] ++
    optional enableGSL gsl ++
    optional enableGhostScript ghostscript ++
    optional enableMuPDF mupdf_modded ++
    optional enableDJVU djvulibre ++
    optional enableGOCR gocr ++
    optionals enableTesseract [ leptonica_modded tesseract_modded ];

  dontUseCmakeBuildDir = true;

  cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ];

  NIX_LDFLAGS = "-lpthread";

  installPhase = ''
    install -D -m 755 k2pdfopt $out/bin/k2pdfopt
  '';

  preFixup = optionalString enableTesseract ''
    wrapProgram $out/bin/k2pdfopt --set-default TESSDATA_PREFIX ${tesseract4}/share/tessdata
  '';

  meta = with stdenv.lib; {
    description = "Optimizes PDF/DJVU files for mobile e-readers (e.g. the Kindle) and smartphones";
    homepage = "http://www.willus.com/k2pdfopt";
    license = licenses.gpl3;
    platforms = platforms.linux;
    maintainers = with maintainers; [ bosu danielfullmer ];
  };
}