Files
meta-simulator/scripts/remapOxfordFastq.sh
Angeline Aguinaldo db57e4495d initial commit
2020-11-12 20:18:05 -05:00

83 lines
2.7 KiB
Bash

#!/bin/bash
# **********************************************************************
# Copyright (C) 2020 Johns Hopkins University Applied Physics Laboratory
#
# All Rights Reserved.
# For any other permission, please contact the Legal Office at JHU/APL.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# **********************************************************************
# FUNCTIONS
usage()
{
cat << EOF
Help message for remapOxfordFastq.sh:
DESCRIPTION
NOTES:
- WARNING:
USAGE:
bash scripts/remapOxfordFastq.sh -i data/fullDeepSim/metasim-strawman_envassay.tsv/r9/ -o pass_mapped.fastq
OPTIONS:
-h help show this message
-i FASTQ Input fastq containing directory from sim_module deepsim script run
-o FASTQ output fastq with mapped (3rd column) as runid
NOTES:
This script requires the input to be the metasim output folder at first depth/child (40067, 1282, etc.) where each accession/seq header is a folder within those with the pass.fastq file in that
____________________________________________________________________________________________________
References:
1. O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: The USENIX Magazine, February 2011:42-47.
EOF
}
# ARGUMENTS
# parse args
while getopts "ho:i:" OPTION
do
case $OPTION in
h) usage; exit 1 ;;
o) output_file=$OPTARG ;;
i) INPUT=$OPTARG ;;
?) usage; exit ;;
esac
done
# check args
if [[ -z "$INPUT" ]]; then printf "%s\n" "Please specify input directory containing fasta file headers as directory with deepsim output (pass.fastq) (-i)."; exit; fi
if [[ -z "$output_file" ]]; then printf "%s\n" "Please specify output filename in the same directory as the pass.fastq file (-o)."; exit; fi
if [[ "$output_file" == "pass.fastq" ]]; then printf "%s\n" "Please specify output file name different than the pass.fastq filename."; exit; fi
for file in $(find $INPUT -maxdepth 5 -name "pass.fastq"); do # Not recommended, will break on whitespace
dir=$(dirname $file)
baseDir=$(basename $dir)
# echo $dir $baseDir
awk -v name=$baseDir 'BEGIN{li=0; inc=0;}{if( NR%4 == 1 ) {++li; print "@"name"_"li}else{print}}' $file > $dir"/"$output_file
#awk -v name=$baseDir 'BEGIN{li=0; inc=0;}{if( NR%4 == 1 ) {++li; print "@"name"_"li}}' $file
done