From 579f4e92e6331d8b23a8ef46c58b1f3667b83f02 Mon Sep 17 00:00:00 2001 From: Disassembler Date: Sat, 7 Oct 2017 19:26:04 +0200 Subject: [PATCH] Add SeedDMS support for office document indexing --- 10-seeddms.sh | 2 +- seeddms/etc/php/7.0/fpm/pool.d/seeddms.conf | 2 ++ seeddms/srv/seeddms/www/conf/settings.xml | 10 ++++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/10-seeddms.sh b/10-seeddms.sh index 19a0b24..5c6fedf 100755 --- a/10-seeddms.sh +++ b/10-seeddms.sh @@ -3,7 +3,7 @@ SOURCE_DIR=$(realpath $(dirname "${0}")) # Install dependencies for SeedDMS -apt-get -y --no-install-recommends install catdoc ghostscript gnumeric id3 imagemagick php7.0-gd php7.0-fpm php7.0-mbstring php7.0-pgsql php7.0-xml php-log php-mail php-pear poppler-utils +apt-get -y --no-install-recommends install ghostscript id3 imagemagick libreoffice-calc libreoffice-writer php7.0-gd php7.0-fpm php7.0-mbstring php7.0-pgsql php7.0-xml php-log php-mail php-pear poppler-utils unoconv # Install SeedDMS wget https://sourceforge.net/projects/seeddms/files/seeddms-5.1.3/seeddms-quickstart-5.1.3.tar.gz/download -O /srv/seeddms.tgz diff --git a/seeddms/etc/php/7.0/fpm/pool.d/seeddms.conf b/seeddms/etc/php/7.0/fpm/pool.d/seeddms.conf index 45fcb40..24bb4e8 100644 --- a/seeddms/etc/php/7.0/fpm/pool.d/seeddms.conf +++ b/seeddms/etc/php/7.0/fpm/pool.d/seeddms.conf @@ -11,3 +11,5 @@ pm.max_children = 8 php_admin_value[open_basedir] = /srv/seeddms:/tmp:/tmp:/usr/share/php php_admin_value[upload_max_filesize] = 100M + +env[LANG]=en_US.UTF-8 diff --git a/seeddms/srv/seeddms/www/conf/settings.xml b/seeddms/srv/seeddms/www/conf/settings.xml index 66c4e15..3b38b19 100644 --- a/seeddms/srv/seeddms/www/conf/settings.xml +++ b/seeddms/srv/seeddms/www/conf/settings.xml @@ -24,10 +24,16 @@ pdftotext -enc UTF-8 -nopgbrk %s - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g' - catdoc %s - ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1 + unoconv -d document -f txt --stdout %s + unoconv -d document -f txt --stdout %s + unoconv -d document -f txt --stdout %s + unoconv -d document -f txt --stdout %s + unoconv -d spreadsheet -f csv --stdout %s + unoconv -d spreadsheet -f csv --stdout %s + unoconv -d spreadsheet -f csv --stdout %s id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g' id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g' + unoconv -d document -f txt --stdout %s cat %s