academic/vCAPS_coevolution: Added (Coevolution Analysis)

Signed-off-by: Dave Woodfall <dave@slackbuilds.org>
This commit is contained in:
Petar Petrov 2022-09-09 03:10:09 +01:00 committed by Dave Woodfall
parent 1f0ff62f35
commit 0a28023757
7 changed files with 357 additions and 0 deletions

View file

@ -0,0 +1,114 @@
diff -pruN orig/caps.cpp new/caps.cpp
--- orig/caps.cpp 2012-12-15 17:13:23.000000000 +0200
+++ new/caps.cpp 2020-09-09 23:07:46.080566000 +0300
@@ -14,7 +14,7 @@
#include <gsl/gsl_statistics.h>
#include<sys/time.h>
#include<iomanip>
-
+#include <bits/stdc++.h>
@@ -69,6 +69,8 @@
const gsl_rng_type * T;
gsl_rng *r;
+vector<double> totaltempnew;
+double alphathresh = 0;
int main(int argc, char *argv[]){
@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){
print_splash(output);
+ OUTPUT << "\n\File1: " << files[i] << endl;
vec1.print_to_fasta(output.c_str());
+ OUTPUT << "\n\nFile2: " << files[j] << endl;
vec2.print_to_fasta(output.c_str());
int length1 = vec1.sequences[0].length();
int length2 = vec2.sequences[0].length();
+ OUTPUT << "\n\nLength1: " << length1 << endl;
+ OUTPUT << "Length2: " << length2 << endl;
if(tree_in ==0){
tree1 = create_input_tree(vec1.names, vec1.sequences);
tree2 = create_input_tree(vec2.names, vec2.sequences);
+
+ // Output the CAPS generated trees to the .out file of each pair
+ string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true);
+ string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true);
+ OUTPUT << "\n" << endl;
+ OUTPUT << "CAPS generated tree 1: " << temptre1 << endl;
+ OUTPUT << "CAPS generated tree 2: " << temptre2 << endl;
}/*else if(tree_in ==1 && variable==1){
std::auto_ptr<DistanceMatrix> DS;
@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){
int value = floor(((totaltemp.size())*(1-(threshval))))+1;
threshold = totaltemp[value];
+ totaltempnew = totaltemp;
/*=======================================================*/
@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_
} /* ----- end of function Chi_squared ----- */
+/*
+ * === FUNCTION ======================================================================
+ * Name: find_alpha
+ * Description: Find the index of an element in a vector totaltemp
+ * Help from: https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/
+ * https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above
+ * Author: Petar Petrov, University of Turku (Finland); pebope@utu.fi
+ * =====================================================================================
+ */
+double getIndex(std::vector<double> const& v, double K)
+{
+ auto const it = std::lower_bound(v.begin(), v.end(), fabs(K));
+ //auto it = std::upper_bound(v.begin(), v.end(), fabs(K));
+
+ if (it != v.end()) {
+ int index = distance(v.begin(), it);
+ alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size());
+ return alphathresh;
+ //cerr << index << "\t" << alphathresh << endl;
+ }
+ else {
+ cerr << "ELEMENT NOT FOUND!" << endl;
+ }
+}
@@ -890,9 +928,9 @@ int print_inter(vector<double>& Correl1,
output << endl << endl;
output << "Coevolving Pairs of amino acid sites\n";
- output << "=============================================================================\n";
- output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n";
- output << "=============================================================================\n";
+ output << "================================================================================================================================\n";
+ output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n";
+ output << "================================================================================================================================\n";
//double mean = average_vec<double>(Correl);
//double SD = SD_vf(Correl, mean);
@@ -951,9 +989,11 @@ int print_inter(vector<double>& Correl1,
// }
+ double Alpha1 = getIndex(totaltempnew, Correl1[cor]);
+ double Alpha2 = getIndex(totaltempnew, Correl2[cor]);
//if(bootval>=bootcut && re1<=8 && re2<=8 ){
if(bootval>=bootcut){
- output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl;
+ output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl;
signif.push_back(((Correl1[cor]+Correl2[cor])/2));
++pairs;
vector<int> tem;

View file

@ -0,0 +1,38 @@
diff -pruN old/caps.cpp new/caps.cpp
--- old/caps.cpp 2022-01-20 09:30:24.409886691 +0200
+++ new/caps.cpp 2022-01-20 09:39:10.669845265 +0200
@@ -1,4 +1,5 @@
#include<iostream>
+#include <algorithm>
#include<fstream>
#include"BCFasta.h"
#include"file_manip.h"
@@ -15,7 +16,7 @@
#include<sys/time.h>
#include<iomanip>
#include <bits/stdc++.h>
-
+#include <vector>
#include <Seq/SequenceApplicationTools.h>
@@ -69,6 +70,10 @@
const gsl_rng_type * T;
gsl_rng *r;
+// make sure filenames are sorted!
+// https://stackoverflow.com/a/34757557
+bool compareFunction (std::string a, std::string b) {return a<b;}
+
vector<double> totaltempnew;
double alphathresh = 0;
int main(int argc, char *argv[]){
@@ -189,6 +194,8 @@ int main(int argc, char *argv[]){
vector<string> files;
files = Folder_to_vector(mystring.c_str());
+ // make sure filenames are sorted!
+ std::sort(files.begin(),files.end(),compareFunction);
Fasta_vector file;
file.ref_num=0;

View file

@ -0,0 +1,33 @@
vCAPS: (verbose) Coevolution Analysis using Protein Sequences
CAPS is aimed at measuring the coevolution between amino acid sites
belonging to the same protein (intra-molecular coevolution) or to two
functionally or physically interacting proteins (inter-molecular
coevolution). The Software implements an improved method to detect
intra-molecular coevolution as published in Genetics (Fares and Travers,
2006) and also inter-protein coevolution analysis. The improved scoring
of amino acid sites is obtained by maximum likelihood ancestral state
reconstruction along with simulations to assess significance.
This is a modified version of "CAPS_coevolution", used in the AutoCoEv
pipeline:
https://github.com/mattilalab/autocoev
It applies two _unofficial_ patches:
- 01_caps_verbose: makes the program output its generated trees, as well
as the p-value for each correlated amino acid pair
- 02_caps_sort_input: introduce a function to sort input file names
The produced executable is called "vCAPS" and can be installed along
"caps" from CAPS_coevolution. Building CAPS from source requires the
Bio++ 1.9 suite libraries, but make sure the current versions of the
bppsuite (and its dependencies) are NOT installed at build time.
Citing CAPS:
CAPS: coevolution analysis using protein sequences. Fares MA, McNally D.
Bioinformatics. 2006 Nov 15;22(22):2821-2. PMID: 17005535
The mathematical model has been described separately:
A novel method for detecting intramolecular coevolution: adding a
further dimension to selective constraints analyses. Fares MA, Travers
SA. Genetics. 2006 May;173(1):9-23. PMID: 16547113

View file

@ -0,0 +1,14 @@
If you use CAPS in your research, please include the following citations:
CAPS: coevolution analysis using protein sequences.
Fares MA, McNally D.
Bioinformatics. 2006 Nov 15;22(22):2821-2.
PMID: 17005535
https://www.ncbi.nlm.nih.gov/pubmed/17005535
The mathematical model has been described separately:
A novel method for detecting intramolecular coevolution: adding a further dimension to selective constraints analyses.
Fares MA, Travers SA.
Genetics. 2006 May;173(1):9-23.
PMID: 16547113
https://www.ncbi.nlm.nih.gov/pubmed/16547113

View file

@ -0,0 +1,19 @@
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct. It's also
# customary to leave one space after the ':' except on otherwise blank lines.
|-----handy-ruler------------------------------------------------------|
vCAPS_coevolution: vCAPS_coevolution (Coevolution Analysis using Protein Sequences)
vCAPS_coevolution:
vCAPS_coevolution: CAPS is aimed at measuring the coevolution between amino acid
vCAPS_coevolution: sites belonging to the same protein (intra-molecular coevolution)
vCAPS_coevolution: or to two functionally or physically interacting proteins (inter-
vCAPS_coevolution: molecular coevolution). In addition, a test which assesses
vCAPS_coevolution: whether two proteins are interacting is implemented.
vCAPS_coevolution:
vCAPS_coevolution: Home: http://bioinf.gen.tcd.ie/~faresm/software/software.html
vCAPS_coevolution:
vCAPS_coevolution:

View file

@ -0,0 +1,129 @@
#!/bin/bash
# Slackware build script for vCAPS_coevolution
# Copyright 2020-2022 Petar Petrov slackalaxy@gmail.com
# All rights reserved.
#
# Redistribution and use of this script, with or without modification, is
# permitted provided that the following conditions are met:
#
# 1. Redistributions of this script must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cd $(dirname $0) ; CWD=$(pwd)
PRGNAM=vCAPS_coevolution
VERSION=${VERSION:-2.0_2UN}
BUILD=${BUILD:-1}
TAG=${TAG:-_SBo}
PKGTYPE=${PKGTYPE:-tgz}
SRCNAM=caps
SRCVER=2.0
BINNAM=vCAPS
if [ -z "$ARCH" ]; then
case "$( uname -m )" in
i?86) ARCH=i586 ;;
arm*) ARCH=arm ;;
*) ARCH=$( uname -m ) ;;
esac
fi
if [ ! -z "${PRINT_PACKAGE_NAME}" ]; then
echo "$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE"
exit 0
fi
TMP=${TMP:-/tmp/SBo}
PKG=$TMP/package-$PRGNAM
OUTPUT=${OUTPUT:-/tmp}
if [ "$ARCH" = "i586" ]; then
SLKCFLAGS="-O2 -march=i586 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "i686" ]; then
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "x86_64" ]; then
SLKCFLAGS="-O2 -fPIC"
LIBDIRSUFFIX="64"
else
SLKCFLAGS="-O2"
LIBDIRSUFFIX=""
fi
LIBDIRPATH="-Wl,-rpath,/usr/lib${LIBDIRSUFFIX}/Bpp1.9"
set -e
rm -rf $PKG
mkdir -p $TMP $PKG $OUTPUT
cd $TMP
rm -rf ${SRCNAM}${SRCVER}_src
unzip $CWD/${SRCNAM}2_src.zip
cd ${SRCNAM}${SRCVER}_src
chown -R root:root .
find -L . \
\( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
-o -perm 511 \) -exec chmod 755 {} \; -o \
\( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
-o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
# This is needed for gcc in Slackware 14.2
sed -i "s:CC=g++ -g:CC=g++-5 -g -std=c++11:" Makefile
# Use our CFLAGS and the custom (legacy) lib path
sed -i "s:CFLAGS=:CFLAGS=$SLKCFLAGS $LIBDIRPATH:" Makefile
# Find the legacy bpp libraries
sed -i "s:-lbpp-phyl:-L/usr/lib${LIBDIRSUFFIX}/Bpp1.9 -lbpp-phyl:g" Makefile
sed -i "s:-lbpp-numcalc:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-numcalc:g" Makefile
sed -i "s:-lbpp-utils:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-utils:g" Makefile
sed -i "s:-lbpp-seq:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-seq:g" Makefile
# Rename the produced executable
sed -i "s:-o caps:-o $BINNAM:" Makefile
# Use our patches
patch -p1 -i $CWD/01_caps_verbose.patch
patch -p1 -i $CWD/02_caps_sort_input.patch
# we already specified g++-5 above, so no need for this
#source /etc/profile.d/gcc5.sh
make all
# Install the binary produced from our patched source, as "vCAPS"
install -D -m755 $BINNAM $PKG/usr/bin/$BINNAM
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
| cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
mkdir -p $PKG/usr/share/$PRGNAM
cp -a sample structures trees TLR1.fa.out $PKG/usr/share/$PRGNAM
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
cp -a \
caps_manual.pdf \
$PKG/usr/doc/$PRGNAM-$VERSION
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
cat $CWD/References > $PKG/usr/doc/$PRGNAM-$VERSION/References
mkdir -p $PKG/install
cat $CWD/slack-desc > $PKG/install/slack-desc
cd $PKG
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE

View file

@ -0,0 +1,10 @@
PRGNAM="vCAPS_coevolution"
VERSION="2.0_2UN"
HOMEPAGE="http://bioinf.gen.tcd.ie/caps/home.html"
DOWNLOAD="https://raw.githubusercontent.com/slackalaxy/sources/main/caps2_src.zip"
MD5SUM="0914007c32ed22a9cb8a47b55cd18a39"
DOWNLOAD_x86_64=""
MD5SUM_x86_64=""
REQUIRES="bpp1.9-phyl"
MAINTAINER="Petar Petrov"
EMAIL="slackalaxy@gmail.com"