-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcolorgram_tool.sh
More file actions
executable file
·143 lines (112 loc) · 4.42 KB
/
colorgram_tool.sh
File metadata and controls
executable file
·143 lines (112 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
if test "$#" -lt 4; then
echo "Too few arguments were given..."
elif test "$#" -eq 4; then
# example usage: ./colorgra_tool.sh -k=32 file_list.txt kmc-files-dir out-filename
mkdir -p $3/
rm $3/*kmc_pre
rm $3/*kmc_suf
# cut the beginnings, ends of the lists
python cut_sides.py "$@"
#############CALCULATE THE SIDES#############
# calculate k - 1
K_1="$((${1/-k=/}-1))"
# create temp tidrectory for kmc
mkdir -p kmc_temp
ls -d -1 $3/* | xargs -l -i ./3rd_party/KMC/bin/kmc -ci0 -fm -k$K_1 -b -m1 -cs1 {} {} kmc_temp
# put the kmer
begin_array=($(ls -d -1 $3/*begin))
end_array=($(ls -d -1 $3/*end))
if test ${#begin_array[@]} -eq 1; then
mv ${begin_array[0]}.kmc_pre $3/begin.kmc_pre
mv ${begin_array[0]}.kmc_suf $3/begin.kmc_suf
mv ${end_array[0]}.kmc_pre $3/end.kmc_pre
mv ${end_array[0]}.kmc_suf $3/end.kmc_suf
else
# begin edges - union
#kmc_tools simple out1 out2 intersect out_ins
mkdir -p $3/begin_tmp
./3rd_party/KMC/bin/kmc_tools simple ${begin_array[0]} ${begin_array[1]} union $3/begin_tmp/begin
for (( i=1; i < ${#begin_array[@]}; i++ )); do
./3rd_party/KMC/bin/kmc_tools simple ${begin_array[$i]} $3/begin_tmp/begin union $3/begin
mv $3/begin.kmc* $3/begin_tmp/
done
mv $3/begin_tmp/* $3/
# end edges - union
mkdir -p $3/end_tmp
./3rd_party/KMC/bin/kmc_tools simple ${end_array[0]} ${end_array[1]} union $3/end_tmp/end
for (( i=1; i < ${#end_array[@]}; i++ )); do
./3rd_party/KMC/bin/kmc_tools simple ${end_array[$i]} $3/end_tmp/end union $3/end
mv $3/end.kmc* $3/end_tmp/
done
mv $3/end_tmp/* $3/
fi
# #############CALCULATE THE EDGES#############
# get value k
K="$((${1/-k=/}))"
cat $2 | xargs -l -i ./3rd_party/KMC/bin/kmc -ci0 -fm -k$K -b -m1 -cs1 {} {} kmc_temp
# move files to the edge folder
# cat $2 | xargs -l -i bash -c 'ls {}.kmc*' | xargs -l -i mv {} $3/
reads=( $(cat $2) )
# echo "${reads[@]}"
if test ${#begin_array[@]} -eq 1; then
mv ${reads[0]}.kmc_pre $3/kmers.kmc_pre
mv ${reads[0]}.kmc_suf $3/kmers.kmc_suf
else
# if there are more reads => put the union of the edges into one database
mkdir -p $3/kmers_tmp
./3rd_party/KMC/bin/kmc_tools simple ${reads[0]} ${reads[1]} union $3/kmers_tmp/kmers
for (( i=1; i < ${#reads[@]}; i++ )); do
./3rd_party/KMC/bin/kmc_tools simple ${reads[$i]} $3/kmers_tmp/kmers union $3/kmers
mv $3/kmers.kmc* $3/kmers_tmp/
done
mv $3/kmers_tmp/* $3/
fi
# count the number of lines in the given kmer list file
MAXCOLORS=`cat $2 | sed '/^\s*$/d' | wc -l`
echo compiling with $MAXCOLORS colors
# build colorgram
mkdir -p build
cd build
cmake ../ -DMAXCOLORS=$MAXCOLORS
make
cd ../
echo build/colorgram-build $K 0 $2 $3/kmers $3/begin $3/end $4
time build/colorgram-build $K 0 $2 $3/kmers $3/begin $3/end $4
else
# example usage: ./colorgra_tool.sh -k=32 file_list.txt kmc-files-dir out-filename -m
mkdir -p $3/
rm $3/*kmc_pre
rm $3/*kmc_suf
# cut the beginnings, ends of the lists
python cut_sides.py "$@"
# #############CALCULATE THE SIDES#############
# calculate k - 1
K_1="$((${1/-k=/}-1))"
# create temp tidrectory for kmc
mkdir -p kmc_temp
ls -d -1 $3/* | xargs -l -i ./3rd_party/KMC/bin/kmc -ci0 -fm -k$K_1 -b -m1 -cs1 {} {} kmc_temp
# renaming the begin and end kmer databases...
`mv $3/*begin.kmc_pre $3/begin.kmc_pre`
`mv $3/*begin.kmc_suf $3/begin.kmc_suf`
`mv $3/*end.kmc_pre $3/end.kmc_pre`
`mv $3/*end.kmc_suf $3/end.kmc_suf`
# #############CALCULATE THE EDGES#############
# get value k
K="$((${1/-k=/}))"
./3rd_party/KMC/bin/kmc -ci0 -fm -k$K -b -m1 -cs1 $2 $2 kmc_temp
`mv $2.kmc_pre $3/kmers.kmc_pre`
`mv $2.kmc_suf $3/kmers.kmc_suf`
# count the number of lines in the given kmer list file
MAXCOLORS=`cat $2 | sed '/^\s*$/d' | wc -l`
MAXCOLORS=$((MAXCOLORS / 2))
echo compiling with $MAXCOLORS colors
# build colorgram
mkdir -p build
cd build
cmake ../ -DMAXCOLORS=$MAXCOLORS
make
cd ../
echo build/colorgram-build $K 0 $2 $3/kmers $3/begin $3/end $4 -m
time build/colorgram-build $K 0 $2 $3/kmers $3/begin $3/end $4 -m
fi