perl/perl-text-unaccent: Added (perl module).

Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
This commit is contained in:
B. Watson 2015-10-28 23:50:42 +07:00 committed by Willy Sudiarto Raharjo
parent 908d2798bc
commit 94fc0ca4d6
5 changed files with 168 additions and 0 deletions

View file

@ -0,0 +1,6 @@
perl-text-unaccent (perl module to remove accents from a string)
Text::Unaccent is a module that remove accents from a string. unac_string
converts the input string from the specified charset to UTF-16 and call
unac_string_utf16 to return the unaccented equivalent. The conversion
from and to UTF-16 is done with iconv(1).

View file

@ -0,0 +1,46 @@
diff -Naur Text-Unaccent-1.08/unac.c Text-Unaccent-1.08.patched/unac.c
--- Text-Unaccent-1.08/unac.c 2004-10-17 15:00:36.000000000 -0400
+++ Text-Unaccent-1.08.patched/unac.c 2015-10-26 18:41:33.000000000 -0400
@@ -360,6 +360,12 @@
* 004F LATIN CAPITAL LETTER O
* 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
* 006F LATIN SMALL LETTER O
+ * 0152 LATIN CAPITAL LIGATURE OE
+ * 004F LATIN CAPITAL LETTER O
+ * 0045 LATIN CAPITAL LETTER E
+ * 0153 LATIN SMALL LIGATURE OE
+ * 006F LATIN SMALL LETTER O
+ * 0065 LATIN SMALL LETTER E
* 0154 LATIN CAPITAL LETTER R WITH ACUTE
* 0052 LATIN CAPITAL LETTER R
* 0155 LATIN SMALL LETTER R WITH ACUTE
@@ -12933,7 +12939,7 @@
/* 3 */ { 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 },
/* 4 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
/* 5 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35 },
-/* 6 */ { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34 },
+/* 6 */ { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 },
/* 7 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
/* 8 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
/* 9 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
@@ -13149,7 +13155,7 @@
unsigned short unac_data3[] = { 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0065, 0x0063, 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0xFFFF, 0x006E, 0x006F, 0x006F, 0x006F, 0x006F, 0x006F, 0xFFFF, 0x006F, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0xFFFF, 0x0079 };
unsigned short unac_data4[] = { 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 0x0044, 0x0064, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067 };
unsigned short unac_data5[] = { 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0048, 0x0068, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0xFFFF, 0x0049, 0x004A, 0x0069, 0x006A, 0x004A, 0x006A, 0x004B, 0x006B, 0xFFFF, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x00B7 };
-unsigned short unac_data6[] = { 0x006C, 0x00B7, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, 0x006E, 0x02BC, 0x006E, 0xFFFF, 0xFFFF, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0xFFFF, 0xFFFF, 0x0052, 0x0072, 0x0052, 0x0072, 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073 };
+unsigned short unac_data6[] = { 0x006C, 0x00B7, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, 0x006E, 0x02BC, 0x006E, 0xFFFF, 0xFFFF, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x0045, 0x006F, 0x0065, 0x0052, 0x0072, 0x0052, 0x0072, 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073 };
unsigned short unac_data7[] = { 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0054, 0x0074, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073 };
unsigned short unac_data8[] = { 0x0062, 0x0042, 0x0042, 0x0062, 0xFFFF, 0xFFFF, 0xFFFF, 0x0043, 0x0063, 0xFFFF, 0x0044, 0x0044, 0x0064, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0046, 0x0066, 0x0047, 0xFFFF, 0xFFFF, 0xFFFF, 0x0049, 0x004B, 0x006B, 0x006C, 0xFFFF, 0xFFFF, 0x004E, 0x006E, 0x004F };
unsigned short unac_data9[] = { 0x004F, 0x006F, 0xFFFF, 0xFFFF, 0x0050, 0x0070, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0074, 0x0054, 0x0074, 0x0054, 0x0055, 0x0075, 0xFFFF, 0x0056, 0x0059, 0x0079, 0x005A, 0x007A, 0xFFFF, 0xFFFF, 0xFFFF, 0x0292, 0xFFFF, 0xFFFF, 0xFFFF, 0x0296, 0xFFFF };
@@ -13881,9 +13887,9 @@
*out_lengthp = 0;
} else {
char* utf16 = 0;
- int utf16_length = 0;
+ size_t utf16_length = 0;
char* utf16_unaccented = 0;
- int utf16_unaccented_length = 0;
+ size_t utf16_unaccented_length = 0;
if(convert(charset, utf16be(), in, in_length, &utf16, &utf16_length) < 0) {
return -1;

View file

@ -0,0 +1,87 @@
#!/bin/sh
# Slackware build script for perl-text-unaccent
# Written by B. Watson (yalhcru@gmail.com)
# Licensed under the WTFPL. See http://www.wtfpl.net/txt/copying/ for details.
PRGNAM=perl-text-unaccent
VERSION=${VERSION:-1.08}
BUILD=${BUILD:-1}
TAG=${TAG:-_SBo}
if [ -z "$ARCH" ]; then
case "$( uname -m )" in
i?86) ARCH=i486 ;;
arm*) ARCH=arm ;;
*) ARCH=$( uname -m ) ;;
esac
fi
CWD=$(pwd)
TMP=${TMP:-/tmp/SBo}
PKG=$TMP/package-$PRGNAM
OUTPUT=${OUTPUT:-/tmp}
if [ "$ARCH" = "i486" ]; then
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "i686" ]; then
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "x86_64" ]; then
SLKCFLAGS="-O2 -fPIC"
LIBDIRSUFFIX="64"
else
SLKCFLAGS="-O2"
LIBDIRSUFFIX=""
fi
set -e
TARNAME=Text-Unaccent
rm -rf $PKG
mkdir -p $TMP $PKG $OUTPUT
cd $TMP
rm -rf $TARNAME-$VERSION
tar xvf $CWD/$TARNAME-$VERSION.tar.gz
cd $TARNAME-$VERSION
chown -R root:root .
find -L . \
\( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
-o -perm 511 \) -exec chmod 755 {} \; -o \
\( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
-o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
# combined patch made by diffing against the unac.c in system/unac.
patch -p1 < $CWD/oe_lig_and_size_t.diff
perl Makefile.PL \
PREFIX=/usr \
INSTALLDIRS=vendor \
INSTALLVENDORMAN3DIR=/usr/man/man3
make
make test
make install DESTDIR=$PKG
gzip $PKG/usr/man/man?/*.?
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
| cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
find $PKG -name perllocal.pod \
-o -name ".packlist" \
-o -name "*.bs" \
| xargs rm -f
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
cp -a COPYING ChangeLog README $PKG/usr/doc/$PRGNAM-$VERSION
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
mkdir -p $PKG/install
cat $CWD/slack-desc > $PKG/install/slack-desc
cd $PKG
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}

View file

@ -0,0 +1,10 @@
PRGNAM="perl-text-unaccent"
VERSION="1.08"
HOMEPAGE="https://metacpan.org/pod/Text::Unaccent"
DOWNLOAD="https://cpan.metacpan.org/authors/id/L/LD/LDACHARY/Text-Unaccent-1.08.tar.gz"
MD5SUM="9ac9b28cbb66f4829a50d563ace79cb5"
DOWNLOAD_x86_64=""
MD5SUM_x86_64=""
REQUIRES=""
MAINTAINER="B. Watson"
EMAIL="yalhcru@gmail.com"

View file

@ -0,0 +1,19 @@
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct. It's also
# customary to leave one space after the ':' except on otherwise blank lines.
|-----handy-ruler------------------------------------------------------|
perl-text-unaccent: perl-text-unaccent (perl module to remove accents from a string)
perl-text-unaccent:
perl-text-unaccent: Text::Unaccent is a module that remove accents from a
perl-text-unaccent: string. unac_string converts the input string from the specified
perl-text-unaccent: charset to UTF-16 and call unac_string_utf16 to return the unaccented
perl-text-unaccent: equivalent. The conversion from and to UTF-16 is done with iconv(1).
perl-text-unaccent:
perl-text-unaccent:
perl-text-unaccent:
perl-text-unaccent:
perl-text-unaccent: