mirror of
https://github.com/Ponce/slackbuilds
synced 2024-11-04 20:29:09 +01:00
python/python-pdfminer: Added (PDF parser and analyzer).
Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
This commit is contained in:
parent
6a250f182e
commit
8ee80adc21
4 changed files with 151 additions and 0 deletions
23
python/python-pdfminer/README
Normal file
23
python/python-pdfminer/README
Normal file
|
@ -0,0 +1,23 @@
|
|||
PDFMiner is a tool for extracting information from PDF documents. Unlike
|
||||
other PDF-related tools, it focuses entirely on getting and analyzing
|
||||
text data. PDFMiner allows one to obtain the exact location of text in a
|
||||
page, as well as other information such as fonts or lines. It includes a
|
||||
PDF converter that can transform PDF files into other text formats (such
|
||||
as HTML). It has an extensible PDF parser that can be used for other
|
||||
purposes than text analysis.
|
||||
|
||||
PDFMiner comes with two handy tools: pdf2txt.py and dumppdf.py.
|
||||
|
||||
pdf2txt.py
|
||||
|
||||
pdf2txt.py extracts text contents from a PDF file. It cannot recognize
|
||||
text drawn as images. It also extracts locations, font names/sizes,
|
||||
writing direction. It requires a password for password protected PDF
|
||||
documents. You cannot extract any text from a PDF document which does
|
||||
not have extraction permission.
|
||||
|
||||
dumppdf.py
|
||||
|
||||
dumppdf.py dumps the internal contents of a PDF file in pseudo-XML
|
||||
format. This program is primarily for debugging purposes, but it's also
|
||||
possible to extract some meaningful contents (e.g. images).
|
99
python/python-pdfminer/python-pdfminer.SlackBuild
Normal file
99
python/python-pdfminer/python-pdfminer.SlackBuild
Normal file
|
@ -0,0 +1,99 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Slackware build script for python-pdfminer
|
||||
|
||||
# Copyright 2015 Brenton Earl <brent@exitstatusone.com>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use of this script, with or without modification, is
|
||||
# permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of this script must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
PRGNAM=python-pdfminer
|
||||
SRCNAM=pdfminer
|
||||
VERSION=${VERSION:-20140328}
|
||||
BUILD=${BUILD:-1}
|
||||
TAG=${TAG:-_SBo}
|
||||
|
||||
if [ -z "$ARCH" ]; then
|
||||
case "$( uname -m )" in
|
||||
i?86) ARCH=i486 ;;
|
||||
arm*) ARCH=arm ;;
|
||||
*) ARCH=$( uname -m ) ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
CWD=$(pwd)
|
||||
TMP=${TMP:-/tmp/SBo}
|
||||
PKG=$TMP/package-$PRGNAM
|
||||
OUTPUT=${OUTPUT:-/tmp}
|
||||
|
||||
if [ "$ARCH" = "i486" ]; then
|
||||
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
|
||||
LIBDIRSUFFIX=""
|
||||
elif [ "$ARCH" = "i686" ]; then
|
||||
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
|
||||
LIBDIRSUFFIX=""
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
SLKCFLAGS="-O2 -fPIC"
|
||||
LIBDIRSUFFIX="64"
|
||||
else
|
||||
SLKCFLAGS="-O2"
|
||||
LIBDIRSUFFIX=""
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
rm -rf $PKG
|
||||
mkdir -p $TMP $PKG $OUTPUT
|
||||
cd $TMP
|
||||
rm -rf $SRCNAM-$VERSION
|
||||
tar xvf $CWD/$SRCNAM-$VERSION.tar.gz
|
||||
cd $SRCNAM-$VERSION
|
||||
chown -R root:root .
|
||||
find -L . \
|
||||
\( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
|
||||
-o -perm 511 \) -exec chmod 755 {} \; -o \
|
||||
\( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
|
||||
-o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
|
||||
|
||||
# Enables the ability to process Chinese, Japanese and Korean Languagues
|
||||
make cmap # Comment out this line to disable this support
|
||||
|
||||
# Build / Install
|
||||
python setup.py install --root=$PKG
|
||||
|
||||
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
|
||||
| cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
|
||||
|
||||
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
|
||||
cp -a \
|
||||
PKG-INFO \
|
||||
$PKG/usr/doc/$PRGNAM-$VERSION
|
||||
cp -R \
|
||||
samples/ \
|
||||
$PKG/usr/doc/$PRGNAM-$VERSION
|
||||
cp -R \
|
||||
docs/ \
|
||||
$PKG/usr/doc/$PRGNAM-$VERSION/html_docs
|
||||
cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README
|
||||
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
|
||||
|
||||
mkdir -p $PKG/install
|
||||
cat $CWD/slack-desc > $PKG/install/slack-desc
|
||||
|
||||
cd $PKG
|
||||
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
|
10
python/python-pdfminer/python-pdfminer.info
Normal file
10
python/python-pdfminer/python-pdfminer.info
Normal file
|
@ -0,0 +1,10 @@
|
|||
PRGNAM="python-pdfminer"
|
||||
VERSION="20140328"
|
||||
HOMEPAGE="https://euske.github.io/pdfminer/index.html"
|
||||
DOWNLOAD="https://pypi.python.org/packages/source/p/pdfminer/pdfminer-20140328.tar.gz"
|
||||
MD5SUM="dfe3eb1b7b7017ab514aad6751a7c2ea"
|
||||
DOWNLOAD_x86_64=""
|
||||
MD5SUM_x86_64=""
|
||||
REQUIRES=""
|
||||
MAINTAINER="Brenton Earl"
|
||||
EMAIL="brent@exitstatusone.com"
|
19
python/python-pdfminer/slack-desc
Normal file
19
python/python-pdfminer/slack-desc
Normal file
|
@ -0,0 +1,19 @@
|
|||
# HOW TO EDIT THIS FILE:
|
||||
# The "handy ruler" below makes it easier to edit a package description.
|
||||
# Line up the first '|' above the ':' following the base package name, and
|
||||
# the '|' on the right side marks the last column you can put a character in.
|
||||
# You must make exactly 11 lines for the formatting to be correct. It's also
|
||||
# customary to leave one space after the ':' except on otherwise blank lines.
|
||||
|
||||
|-----handy-ruler------------------------------------------------------|
|
||||
python-pdfminer: python-pdfminer (PDF parser and analyzer)
|
||||
python-pdfminer:
|
||||
python-pdfminer: PDFMiner is a tool for extracting information from PDF
|
||||
python-pdfminer: documents. It focuses entirely on getting and analyzing text
|
||||
python-pdfminer: data. PDFMiner can obtain the location of text in a page,
|
||||
python-pdfminer: and other information like fonts or lines. It includes a
|
||||
python-pdfminer: PDF converter that can transform PDF files into several
|
||||
python-pdfminer: text formats. It also includes an extensible PDF parser.
|
||||
python-pdfminer:
|
||||
python-pdfminer: Home page: https://euske.github.io/pdfminer/index.html
|
||||
python-pdfminer:
|
Loading…
Reference in a new issue