Realign top blast hit of multi-fasta file with that fasta file.

Top hits and original fasta files are matched based on the first part of the filename separated by periods (i.e., the filename without any extension).

realign_with_best_hits(best_hits_dir, best_hits_pattern = "bestmatch",
  fasta_dir, fasta_pattern = "\\.fa$", ...)

Arguments

best_hits_dir	Path to directory containing top blast hits.
best_hits_pattern	Pattern used for matching with grep. Only files with names matching the pattern will be included as the top blast hit.
fasta_dir	Path to directory containing fasta files for realignment.
fasta_pattern	Pattern used for matching with grep. Only files with names matching the pattern will be included for realignment.
...	Additional other arguments. Not used by this function, but meant to be used by `drake_plan` for tracking during workflows.

Value

List of lists, each of which is of class `DNAbin`.

Examples


library(ape)

# Make temp dir for storing files
temp_dir <- fs::dir_create(fs::path(tempdir(), "baitfindR_example"))

# Write out ape::woodmouse dataset as DNA
data(woodmouse)
ape::write.FASTA(woodmouse, fs::path(temp_dir, "woodmouse.fasta"))
ape::write.FASTA(woodmouse, fs::path(temp_dir, "woodmouse2.fasta"))

# Make blast database
build_blast_db(
  fs::path(temp_dir, "woodmouse.fasta"),
  db_type = "nucl",
  out_name = "wood",
  parse_seqids = TRUE,
  wd = temp_dir)
#> 
#> 
#> Building a new DB, current time: 05/15/2019 16:40:51
#> New DB name:   /tmp/RtmpeNC9nF/baitfindR_example/wood
#> New DB title:  /tmp/RtmpeNC9nF/baitfindR_example/woodmouse.fasta
#> Sequence type: Nucleotide
#> Keep MBits: T
#> Maximum file size: 1000000000B
#> Adding sequences from FASTA; added 15 sequences in 0.018368 seconds.
#> $status
#> [1] 0
#> 
#> $stdout
#> [1] "\n\nBuilding a new DB, current time: 05/15/2019 16:40:51\nNew DB name:   /tmp/RtmpeNC9nF/baitfindR_example/wood\nNew DB title:  /tmp/RtmpeNC9nF/baitfindR_example/woodmouse.fasta\nSequence type: Nucleotide\nKeep MBits: T\nMaximum file size: 1000000000B\nAdding sequences from FASTA; added 15 sequences in 0.018368 seconds.\n"
#> 
#> $stderr
#> [1] ""
#> 
#> $timeout
#> [1] FALSE
#> 

# Blast the original sequences against the database
blast_n_list(
  fasta_folder = temp_dir,
  fasta_pattern = "fasta",
  database_path = fs::path(temp_dir, "wood")
)
#> [1] "c90963e4b507281024a89cbb54d8074f"

# Extract the top BLAST hit for each fasta file.
extract_blast_hits(
  blast_results_dir = temp_dir,
  blast_results_pattern = "\\.tsv$",
  database_path = fs::path(temp_dir, "wood"),
  out_dir = temp_dir,
  out_ext = "bestmatch"
)
#> Parsed with column specification:
#> cols(
#>   qseqid = col_character(),
#>   sseqid = col_character(),
#>   pident = col_double(),
#>   length = col_double(),
#>   mismatch = col_double(),
#>   gapopen = col_double(),
#>   qstart = col_double(),
#>   qend = col_double(),
#>   sstart = col_double(),
#>   send = col_double(),
#>   evalue = col_double(),
#>   bitscore = col_double()
#> )
#> Parsed with column specification:
#> cols(
#>   qseqid = col_character(),
#>   sseqid = col_character(),
#>   pident = col_double(),
#>   length = col_double(),
#>   mismatch = col_double(),
#>   gapopen = col_double(),
#>   qstart = col_double(),
#>   qend = col_double(),
#>   sstart = col_double(),
#>   send = col_double(),
#>   evalue = col_double(),
#>   bitscore = col_double()
#> )
#> [1] TRUE

realign_with_best_hits(
  best_hits_dir = temp_dir,
  best_hits_pattern = "bestmatch",
  fasta_dir = temp_dir,
  fasta_pattern = "fasta"
)
#> $woodmouse
#> 16 DNA sequences in binary format stored in a matrix.
#> 
#> All sequences of same length: 965 
#> 
#> Labels:
#> No305
#> No304
#> No306
#> No0906S
#> No0908S
#> No0909S
#> ...
#> 
#> Base composition:
#>     a     c     g     t 
#> 0.307 0.261 0.126 0.306 
#> (Total: 15.44 kb)
#> 
#> $woodmouse2
#> 16 DNA sequences in binary format stored in a matrix.
#> 
#> All sequences of same length: 965 
#> 
#> Labels:
#> No305
#> No304
#> No306
#> No0906S
#> No0908S
#> No0909S
#> ...
#> 
#> Base composition:
#>     a     c     g     t 
#> 0.307 0.261 0.126 0.306 
#> (Total: 15.44 kb)
#> 

# Cleanup.
fs::file_delete(temp_dir)

Realign top blast hit of multi-fasta file with that fasta file.

Arguments

Value

Examples

Contents