mirror of
https://github.com/JHUAPL/meta-simulator.git
synced 2026-04-26 03:00:02 -04:00
149 lines
4.0 KiB
Bash
149 lines
4.0 KiB
Bash
# **********************************************************************
|
|
# Copyright (C) 2020 Johns Hopkins University Applied Physics Laboratory
|
|
#
|
|
# All Rights Reserved.
|
|
# For any other permission, please contact the Legal Office at JHU/APL.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# **********************************************************************
|
|
|
|
usage()
|
|
{
|
|
cat << EOF
|
|
|
|
DESCRIPTION: This script will run deep simulator using either CPU or GPU to make a set of reads for a single fasta file. Future Updates below
|
|
|
|
USAGE:
|
|
bash simulate.sh -i absolute path to fasta file
|
|
-c number of cpu nodes you want
|
|
-n Number of reads for simulation
|
|
-g [CPU/GPU/ALBACORE]
|
|
-o <Output Directory that should be the name of the file you want/organism you want to simulate
|
|
OPTIONS:
|
|
-h help show this message
|
|
-i fna reference fasta input file
|
|
-n read# number of reads for given fasta file
|
|
-o output output directory
|
|
-g GPU/CPU Choose either CPU or GPU to run simulation on
|
|
-j exit at fast5 [1|2] where 1 is default and it doesnt exit and 2 exits after fast5 making
|
|
NOTES:
|
|
Update this script is in the works to map abundance profile to a directory of fasta files
|
|
|
|
EOF
|
|
}
|
|
source activate simulator
|
|
|
|
# parsing arguments from command line
|
|
cpu_count=1
|
|
deep_sim_loc="$PWD/src/DeepSimulator"
|
|
guppy_type="CPU"
|
|
read_count=1
|
|
j=1
|
|
while getopts "hi:d:o:n:B:c:r:g:j:" OPTION
|
|
do
|
|
case $OPTION in
|
|
h) usage; exit 1 ;;
|
|
i) fasta_input_file=$OPTARG ;;
|
|
o) output_dir=$OPTARG ;;
|
|
c) cpu_count=$OPTARG ;;
|
|
g) guppy_type=$OPTARG ;;
|
|
d) deep_sim_loc=$OPTARG;;
|
|
n) read_count=$OPTARG;;
|
|
j) j=$OPTARG;;
|
|
?) usage; exit ;;
|
|
esac
|
|
done
|
|
|
|
if [[ (! $guppy_type == "GPU" ) && (! $guppy_type == "CPU") && (! $guppy_type == "ALBACORE") ]]; then
|
|
usage
|
|
echo "Invalid Guppy basecaller selected [GPU|CPU|ALBACORE]. Exiting."
|
|
exit 1
|
|
fi
|
|
echo $guppy_type
|
|
if [[ $guppy_type == "GPU" ]]; then
|
|
guppy_type=1
|
|
elif [[ $guppy_type == "CPU" ]]; then
|
|
guppy_type=2
|
|
else
|
|
guppy_type=3
|
|
fi
|
|
|
|
#define location of fasta input and output location of simulated reads as user
|
|
#since torque runs the script from a different location, specify absolute pathing
|
|
current_loc=$( pwd )
|
|
envbin=$(which python)
|
|
base="$(dirname $envbin)"
|
|
echo $deep_sim_loc
|
|
echo $PWD
|
|
|
|
base=$(basename ${fasta_input_file} .fasta)
|
|
mkdir $output_dir"/$base"
|
|
conda deactivate
|
|
if [[ $j -eq 1 ]]; then
|
|
echo $output_dir"/$base"
|
|
cat <<EOF
|
|
${deep_sim_loc}/deep_simulator.sh \
|
|
-i ${fasta_input_file} \
|
|
-n ${read_count} \
|
|
-c $cpu_count \
|
|
-o $output_dir"/$base" \
|
|
-B $guppy_type \
|
|
-H $deep_sim_loc
|
|
EOF
|
|
bash ${deep_sim_loc}/deep_simulator.sh \
|
|
-i ${fasta_input_file} \
|
|
-n ${read_count} \
|
|
-c $cpu_count \
|
|
-o $output_dir"/$base" \
|
|
-B $guppy_type \
|
|
-H $deep_sim_loc
|
|
# exit 1
|
|
|
|
elif [[ $j -eq 2 ]]; then
|
|
cat <<EOF
|
|
${deep_sim_loc}/deep_simulator_fast5only.sh \
|
|
-i ${fasta_input_file} \
|
|
-n ${read_count} \
|
|
-c $cpu_count \
|
|
-o $output_dir"/$base" \
|
|
-B $guppy_type \
|
|
-H $deep_sim_loc
|
|
EOF
|
|
bash ${deep_sim_loc}/deep_simulator_fast5only.sh \
|
|
-i ${fasta_input_file} \
|
|
-n ${read_count} \
|
|
-c $cpu_count \
|
|
-o $output_dir"/$base" \
|
|
-B $guppy_type \
|
|
-H $deep_sim_loc
|
|
# exit 1
|
|
else
|
|
echo "Exit: -j isn't properly specified as 1 (dont exit after fast5) or 2 (exit after fast5)"
|
|
exit 1
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#After the fun is done for the fullDeepSim run
|
|
|
|
# bash scripts/remapOxfordFastq.sh \
|
|
# -i data/fullDeepSim/metasim-strawman_envassay.tsv/r9/ \
|
|
# -o pass_mapped.fastq && find data/fullDeepSim/metasim-strawman_envassay.tsv/r9/ \
|
|
# -maxdepth 3 \
|
|
# -name "pass_mapped.fastq" \
|
|
# -exec cat {} + > data/fullDeepSim/pass_mapped_merged.fastq
|
|
|