Files
meta-simulator/scripts/simulate.sh
Angeline Aguinaldo db57e4495d initial commit
2020-11-12 20:18:05 -05:00

149 lines
4.0 KiB
Bash

# **********************************************************************
# Copyright (C) 2020 Johns Hopkins University Applied Physics Laboratory
#
# All Rights Reserved.
# For any other permission, please contact the Legal Office at JHU/APL.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# **********************************************************************
usage()
{
cat << EOF
DESCRIPTION: This script will run deep simulator using either CPU or GPU to make a set of reads for a single fasta file. Future Updates below
USAGE:
bash simulate.sh -i absolute path to fasta file
-c number of cpu nodes you want
-n Number of reads for simulation
-g [CPU/GPU/ALBACORE]
-o <Output Directory that should be the name of the file you want/organism you want to simulate
OPTIONS:
-h help show this message
-i fna reference fasta input file
-n read# number of reads for given fasta file
-o output output directory
-g GPU/CPU Choose either CPU or GPU to run simulation on
-j exit at fast5 [1|2] where 1 is default and it doesnt exit and 2 exits after fast5 making
NOTES:
Update this script is in the works to map abundance profile to a directory of fasta files
EOF
}
source activate simulator
# parsing arguments from command line
cpu_count=1
deep_sim_loc="$PWD/src/DeepSimulator"
guppy_type="CPU"
read_count=1
j=1
while getopts "hi:d:o:n:B:c:r:g:j:" OPTION
do
case $OPTION in
h) usage; exit 1 ;;
i) fasta_input_file=$OPTARG ;;
o) output_dir=$OPTARG ;;
c) cpu_count=$OPTARG ;;
g) guppy_type=$OPTARG ;;
d) deep_sim_loc=$OPTARG;;
n) read_count=$OPTARG;;
j) j=$OPTARG;;
?) usage; exit ;;
esac
done
if [[ (! $guppy_type == "GPU" ) && (! $guppy_type == "CPU") && (! $guppy_type == "ALBACORE") ]]; then
usage
echo "Invalid Guppy basecaller selected [GPU|CPU|ALBACORE]. Exiting."
exit 1
fi
echo $guppy_type
if [[ $guppy_type == "GPU" ]]; then
guppy_type=1
elif [[ $guppy_type == "CPU" ]]; then
guppy_type=2
else
guppy_type=3
fi
#define location of fasta input and output location of simulated reads as user
#since torque runs the script from a different location, specify absolute pathing
current_loc=$( pwd )
envbin=$(which python)
base="$(dirname $envbin)"
echo $deep_sim_loc
echo $PWD
base=$(basename ${fasta_input_file} .fasta)
mkdir $output_dir"/$base"
conda deactivate
if [[ $j -eq 1 ]]; then
echo $output_dir"/$base"
cat <<EOF
${deep_sim_loc}/deep_simulator.sh \
-i ${fasta_input_file} \
-n ${read_count} \
-c $cpu_count \
-o $output_dir"/$base" \
-B $guppy_type \
-H $deep_sim_loc
EOF
bash ${deep_sim_loc}/deep_simulator.sh \
-i ${fasta_input_file} \
-n ${read_count} \
-c $cpu_count \
-o $output_dir"/$base" \
-B $guppy_type \
-H $deep_sim_loc
# exit 1
elif [[ $j -eq 2 ]]; then
cat <<EOF
${deep_sim_loc}/deep_simulator_fast5only.sh \
-i ${fasta_input_file} \
-n ${read_count} \
-c $cpu_count \
-o $output_dir"/$base" \
-B $guppy_type \
-H $deep_sim_loc
EOF
bash ${deep_sim_loc}/deep_simulator_fast5only.sh \
-i ${fasta_input_file} \
-n ${read_count} \
-c $cpu_count \
-o $output_dir"/$base" \
-B $guppy_type \
-H $deep_sim_loc
# exit 1
else
echo "Exit: -j isn't properly specified as 1 (dont exit after fast5) or 2 (exit after fast5)"
exit 1
fi
#After the fun is done for the fullDeepSim run
# bash scripts/remapOxfordFastq.sh \
# -i data/fullDeepSim/metasim-strawman_envassay.tsv/r9/ \
# -o pass_mapped.fastq && find data/fullDeepSim/metasim-strawman_envassay.tsv/r9/ \
# -maxdepth 3 \
# -name "pass_mapped.fastq" \
# -exec cat {} + > data/fullDeepSim/pass_mapped_merged.fastq