tesseract-3.04.01.ebuild 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # Copyright 1999-2017 Gentoo Foundation
  2. # Distributed under the terms of the GNU General Public License v2
  3. EAPI=5
  4. MY_PN="tesseract-ocr"
  5. LANGPACKV="3.04.00"
  6. URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
  7. JAVA_PKG_OPT_USE="scrollview"
  8. inherit eutils autotools java-pkg-opt-2
  9. DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
  10. HOMEPAGE="https://github.com/tesseract-ocr"
  11. SRC_URI="https://github.com/${MY_PN}/${PN}/archive/${PV}.tar.gz -> ${P}.tar.gz
  12. ${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
  13. doc? ( https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02-doc-html.tar.gz )
  14. math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
  15. l10n_ar? ( ${URI_PREFIX}ara.traineddata -> ara.traineddata-${LANGPACKV} )
  16. l10n_bg? ( ${URI_PREFIX}bul.traineddata -> bul.traineddata-${LANGPACKV} )
  17. l10n_ca? ( ${URI_PREFIX}cat.traineddata -> cat.traineddata-${LANGPACKV} )
  18. l10n_chr? ( ${URI_PREFIX}chr.traineddata -> chr.traineddata-${LANGPACKV} )
  19. l10n_cs? ( ${URI_PREFIX}ces.traineddata -> ces.traineddata-${LANGPACKV} )
  20. l10n_de? ( ${URI_PREFIX}deu.traineddata -> deu.traineddata-${LANGPACKV}
  21. ${URI_PREFIX}deu_frak.traineddata -> deu_frak.traineddata-${LANGPACKV} )
  22. l10n_da? ( ${URI_PREFIX}dan.traineddata -> dan.traineddata-${LANGPACKV}
  23. ${URI_PREFIX}dan_frak.traineddata -> dan_frak.traineddata-${LANGPACKV} )
  24. l10n_el? ( ${URI_PREFIX}ell.traineddata -> ell.traineddata-${LANGPACKV} )
  25. l10n_es? ( ${URI_PREFIX}spa.traineddata -> spa.traineddata-${LANGPACKV} )
  26. l10n_fi? ( ${URI_PREFIX}fin.traineddata -> fin.traineddata-${LANGPACKV} )
  27. l10n_fr? ( ${URI_PREFIX}fra.traineddata -> fra.traineddata-${LANGPACKV} )
  28. l10n_he? ( ${URI_PREFIX}heb.traineddata -> heb.traineddata-${LANGPACKV} )
  29. l10n_hi? ( ${URI_PREFIX}hin.traineddata -> hin.traineddata-${LANGPACKV} )
  30. l10n_hu? ( ${URI_PREFIX}hun.traineddata -> hun.traineddata-${LANGPACKV} )
  31. l10n_id? ( ${URI_PREFIX}ind.traineddata -> ind.traineddata-${LANGPACKV} )
  32. l10n_it? ( ${URI_PREFIX}ita.traineddata -> ita.traineddata-${LANGPACKV} )
  33. l10n_ja? ( ${URI_PREFIX}jpn.traineddata -> jpn.traineddata-${LANGPACKV} )
  34. l10n_ko? ( ${URI_PREFIX}kor.traineddata -> kor.traineddata-${LANGPACKV} )
  35. l10n_lt? ( ${URI_PREFIX}lit.traineddata -> lit.traineddata-${LANGPACKV} )
  36. l10n_lv? ( ${URI_PREFIX}lav.traineddata -> lav.traineddata-${LANGPACKV} )
  37. l10n_nl? ( ${URI_PREFIX}nld.traineddata -> nld.traineddata-${LANGPACKV} )
  38. l10n_no? ( ${URI_PREFIX}nor.traineddata -> nor.traineddata-${LANGPACKV} )
  39. l10n_pl? ( ${URI_PREFIX}pol.traineddata -> pol.traineddata-${LANGPACKV} )
  40. l10n_pt? ( ${URI_PREFIX}por.traineddata -> por.traineddata-${LANGPACKV} )
  41. l10n_ro? ( ${URI_PREFIX}ron.traineddata -> ron.traineddata-${LANGPACKV} )
  42. l10n_ru? ( ${URI_PREFIX}rus.traineddata -> rus.traineddata-${LANGPACKV} )
  43. l10n_sk? ( ${URI_PREFIX}slk.traineddata -> slk.traineddata-${LANGPACKV}
  44. ${URI_PREFIX}slk_frak.traineddata -> slk_frak.traineddata-${LANGPACKV} )
  45. l10n_sl? ( ${URI_PREFIX}slv.traineddata -> slv.traineddata-${LANGPACKV} )
  46. l10n_sr? ( ${URI_PREFIX}srp.traineddata -> srp.traineddata-${LANGPACKV} )
  47. l10n_sv? ( ${URI_PREFIX}swe.traineddata -> swe.traineddata-${LANGPACKV} )
  48. l10n_th? ( ${URI_PREFIX}tha.traineddata -> tha.traineddata-${LANGPACKV} )
  49. l10n_tl? ( ${URI_PREFIX}tgl.traineddata -> tgl.traineddata-${LANGPACKV} )
  50. l10n_tr? ( ${URI_PREFIX}tur.traineddata -> tur.traineddata-${LANGPACKV} )
  51. l10n_uk? ( ${URI_PREFIX}ukr.traineddata -> ukr.traineddata-${LANGPACKV} )
  52. l10n_vi? ( ${URI_PREFIX}vie.traineddata -> vie.traineddata-${LANGPACKV} )
  53. l10n_zh-CN? ( ${URI_PREFIX}chi_sim.traineddata -> chi_sim.traineddata-${LANGPACKV} )
  54. l10n_zh-TW? ( ${URI_PREFIX}chi_tra.traineddata -> chi_tra.traineddata-${LANGPACKV} )
  55. osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )
  56. "
  57. LICENSE="Apache-2.0"
  58. SLOT="0"
  59. KEYWORDS="alpha amd64 ~arm ~mips ppc ppc64 sparc x86"
  60. IUSE="doc examples jpeg math opencl osd png +scrollview static-libs tiff training -webp l10n_ar l10n_bg l10n_ca l10n_chr l10n_cs l10n_de l10n_da l10n_el l10n_es l10n_fi l10n_fr l10n_he l10n_hi l10n_hu l10n_id l10n_it l10n_ja l10n_ko l10n_lt l10n_lv l10n_nl l10n_no l10n_pl l10n_pt l10n_ro l10n_ru l10n_sk l10n_sl l10n_sr l10n_sv l10n_th l10n_tl l10n_tr l10n_uk l10n_vi l10n_zh-CN l10n_zh-TW"
  61. # With opencl USE=tiff is necessary in leptonica
  62. CDEPEND=">=media-libs/leptonica-1.71:=[zlib,tiff?,jpeg?,png?,webp?]
  63. opencl? (
  64. virtual/opencl
  65. media-libs/tiff:0=
  66. media-libs/leptonica:=[tiff]
  67. )
  68. scrollview? (
  69. >=dev-java/piccolo2d-3.0:0
  70. )
  71. training? (
  72. dev-libs/icu:=
  73. x11-libs/pango:=
  74. x11-libs/cairo:=
  75. )
  76. "
  77. DEPEND="${CDEPEND}
  78. scrollview? ( >=virtual/jdk-1.7 )"
  79. RDEPEND="${CDEPEND}
  80. scrollview? ( >=virtual/jre-1.7 )"
  81. DOCS=( AUTHORS ChangeLog NEWS README.md ReleaseNotes )
  82. PATCHES=(
  83. "${FILESDIR}/tesseract-2.04-gcc47.patch"
  84. "${FILESDIR}/${P}-use-system-piccolo2d.patch"
  85. "${FILESDIR}/${P}-fix-opencl-ldflags.patch"
  86. )
  87. src_unpack() {
  88. unpack ${P}.tar.gz
  89. use doc && unpack tesseract-ocr-3.02.02-doc-html.tar.gz
  90. for file in ${A}; do
  91. if [[ "${file}" == *traineddata* ]]; then
  92. cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
  93. fi
  94. done
  95. }
  96. src_prepare() {
  97. epatch "${PATCHES[@]}"
  98. eautoreconf
  99. java-pkg-opt-2_src_prepare
  100. }
  101. src_configure() {
  102. local myeconfargs=(
  103. --enable-shared
  104. $(use_enable opencl)
  105. $(use_enable scrollview graphics)
  106. $(use_enable static-libs static)
  107. )
  108. econf "${myeconfargs[@]}"
  109. }
  110. src_compile() {
  111. default
  112. use scrollview && emake ScrollView.jar JAVAC="javac $(java-pkg_javac-args)"
  113. use training && emake training
  114. }
  115. src_install() {
  116. default
  117. prune_libtool_files
  118. if use training; then
  119. emake DESTDIR="${D}" training-install
  120. fi
  121. if use examples; then
  122. insinto /usr/share/doc/${PF}/examples
  123. doins testing/eurotext.tif testing/phototest.tif
  124. fi
  125. if use doc; then
  126. dohtml -r "${WORKDIR}/${MY_PN}"/doc/html/*
  127. fi
  128. insinto /usr/share/tessdata
  129. doins tessdata/*traineddata* # language files
  130. use scrollview && doins java/ScrollView.jar # scrollview
  131. }