mirror of
https://github.com/Ponce/slackbuilds
synced 2024-11-21 19:42:24 +01:00
libraries/libexttextcat: Added (Text Categorization library).
Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
This commit is contained in:
parent
4c0a36c790
commit
004c61c7bd
4 changed files with 142 additions and 0 deletions
20
libraries/libexttextcat/README
Normal file
20
libraries/libexttextcat/README
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
Libtextcat is a library with functions that implement the
|
||||||
|
classification technique described in Cavnar & Trenkle, "N-Gram-Based
|
||||||
|
Text Categorization". It was primarily developed for language
|
||||||
|
guessing, a task on which it is known to perform with near-perfect
|
||||||
|
accuracy.
|
||||||
|
|
||||||
|
The central idea of the Cavnar & Trenkle technique is to calculate a
|
||||||
|
"fingerprint" of a document with an unknown category, and compare this
|
||||||
|
with the fingerprints of a number of documents of which the categories
|
||||||
|
are known. The categories of the closest matches are output as the
|
||||||
|
classification. A fingerprint is a list of the most frequent n-grams
|
||||||
|
occurring in a document, ordered by frequency. Fingerprints are
|
||||||
|
compared with a simple out-of-place metric. See the article for more
|
||||||
|
details.
|
||||||
|
|
||||||
|
Considerable effort went into making this implementation fast and
|
||||||
|
efficient. The language guesser processes over 100 documents/second on
|
||||||
|
a simple PC, which makes it practical for many uses. It was developed
|
||||||
|
for use in our webcrawler and search engine software, in which it it
|
||||||
|
handles millions of documents a day.
|
93
libraries/libexttextcat/libexttextcat.SlackBuild
Normal file
93
libraries/libexttextcat/libexttextcat.SlackBuild
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Slackware build script for libexttextcat
|
||||||
|
|
||||||
|
# Copyright 2015 Hunter Sezen California, USA
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use of this script, with or without modification, is
|
||||||
|
# permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of this script must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
|
||||||
|
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
PRGNAM=libexttextcat
|
||||||
|
VERSION=${VERSION:-3.4.4}
|
||||||
|
BUILD=${BUILD:-1}
|
||||||
|
TAG=${TAG:-_SBo}
|
||||||
|
|
||||||
|
if [ -z "$ARCH" ]; then
|
||||||
|
case "$( uname -m )" in
|
||||||
|
i?86) ARCH=i486 ;;
|
||||||
|
arm*) ARCH=arm ;;
|
||||||
|
*) ARCH=$( uname -m ) ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
CWD=$(pwd)
|
||||||
|
TMP=${TMP:-/tmp/SBo}
|
||||||
|
PKG=$TMP/package-$PRGNAM
|
||||||
|
OUTPUT=${OUTPUT:-/tmp}
|
||||||
|
|
||||||
|
if [ "$ARCH" = "i486" ]; then
|
||||||
|
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
|
||||||
|
LIBDIRSUFFIX=""
|
||||||
|
elif [ "$ARCH" = "i686" ]; then
|
||||||
|
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
|
||||||
|
LIBDIRSUFFIX=""
|
||||||
|
elif [ "$ARCH" = "x86_64" ]; then
|
||||||
|
SLKCFLAGS="-O2 -fPIC"
|
||||||
|
LIBDIRSUFFIX="64"
|
||||||
|
else
|
||||||
|
SLKCFLAGS="-O2"
|
||||||
|
LIBDIRSUFFIX=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
rm -rf $PKG
|
||||||
|
mkdir -p $TMP $PKG $OUTPUT
|
||||||
|
cd $TMP
|
||||||
|
rm -rf $PRGNAM-$VERSION
|
||||||
|
tar xvf $CWD/$PRGNAM-$VERSION.tar.xz
|
||||||
|
cd $PRGNAM-$VERSION
|
||||||
|
chown -R root:root .
|
||||||
|
find -L . \
|
||||||
|
\( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
|
||||||
|
-o -perm 511 \) -exec chmod 755 {} \; -o \
|
||||||
|
\( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
|
||||||
|
-o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
|
||||||
|
|
||||||
|
CFLAGS="$SLKCFLAGS" \
|
||||||
|
CXXFLAGS="$SLKCFLAGS" \
|
||||||
|
./configure \
|
||||||
|
--prefix=/usr \
|
||||||
|
--libdir=/usr/lib${LIBDIRSUFFIX} \
|
||||||
|
--build=$ARCH-slackware-linux
|
||||||
|
|
||||||
|
make
|
||||||
|
make install DESTDIR=$PKG
|
||||||
|
|
||||||
|
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
|
||||||
|
| cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
|
||||||
|
|
||||||
|
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
|
||||||
|
cp -a ChangeLog LICENSE READM* TODO $PKG/usr/doc/$PRGNAM-$VERSION
|
||||||
|
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
|
||||||
|
|
||||||
|
mkdir -p $PKG/install
|
||||||
|
cat $CWD/slack-desc > $PKG/install/slack-desc
|
||||||
|
|
||||||
|
cd $PKG
|
||||||
|
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
|
10
libraries/libexttextcat/libexttextcat.info
Normal file
10
libraries/libexttextcat/libexttextcat.info
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
PRGNAM="libexttextcat"
|
||||||
|
VERSION="3.4.4"
|
||||||
|
HOMEPAGE="https://wiki.freedesktop.org/www/Software/libexttextcat/"
|
||||||
|
DOWNLOAD="http://dev-www.libreoffice.org/src/libexttextcat/libexttextcat-3.4.4.tar.xz"
|
||||||
|
MD5SUM="bfa7107c27afda3a3afa4b7ab5a3fe17"
|
||||||
|
DOWNLOAD_x86_64=""
|
||||||
|
MD5SUM_x86_64=""
|
||||||
|
REQUIRES=""
|
||||||
|
MAINTAINER="Hunter Sezen"
|
||||||
|
EMAIL="ovariegata@yahoo.com"
|
19
libraries/libexttextcat/slack-desc
Normal file
19
libraries/libexttextcat/slack-desc
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# HOW TO EDIT THIS FILE:
|
||||||
|
# The "handy ruler" below makes it easier to edit a package description.
|
||||||
|
# Line up the first '|' above the ':' following the base package name, and
|
||||||
|
# the '|' on the right side marks the last column you can put a character in.
|
||||||
|
# You must make exactly 11 lines for the formatting to be correct. It's also
|
||||||
|
# customary to leave one space after the ':' except on otherwise blank lines.
|
||||||
|
|
||||||
|
|-----handy-ruler------------------------------------------------------|
|
||||||
|
libexttextcat: libexttextcat (N-Gram-Based Text Categorization library)
|
||||||
|
libexttextcat:
|
||||||
|
libexttextcat: Libtextcat is a library with functions that implement the
|
||||||
|
libexttextcat: classification technique described in Cavnar & Trenkle, "N-Gram-Based
|
||||||
|
libexttextcat: Text Categorization". It was primarily developed for language
|
||||||
|
libexttextcat: guessing, a task on which it is known to perform with near-perfect
|
||||||
|
libexttextcat: accuracy.
|
||||||
|
libexttextcat:
|
||||||
|
libexttextcat: Homepage: https://wiki.freedesktop.org/www/Software/libexttextcat/
|
||||||
|
libexttextcat:
|
||||||
|
libexttextcat:
|
Loading…
Reference in a new issue