Output is written to the same folder containing the input files.
blast_n_list(fasta_folder, fasta_pattern, database_path, out_ext = "tsv", outfmt = "6", other_args = NULL, overwrite = FALSE, echo = FALSE, get_hash = TRUE, ...)
| fasta_folder | Path to the folder containing fasta files to BLAST. |
|---|---|
| fasta_pattern | Optional; pattern used for matching with grep. Only files with names matching the pattern will be included in the BLAST search. |
| database_path | Path to the BLAST database, including the database name. |
| out_ext | File extension used for BLAST results files. The result of each BLAST search will be a file with the same name as the input fasta files, but with this extension appended. |
| outfmt | String; format to use for BLAST output. See https://www.ncbi.nlm.nih.gov/books/NBK279684/ (Table C1) for details. |
| other_args | Character vector; other arguments to pass on to
|
| overwrite | Logical: should old output be erased before running this function? "Old output" will be determined by matching any file names with `out_ext`. |
| echo | Logical; should standard error and output be printed? |
| get_hash | Logical; if TRUE, the MD5 hash of the output will be returned. |
| ... | Additional other arguments. Not used by this function,
but meant to be used by |
NULL or character vector if `get_hash` is TRUE. Externally, a text file file with the results of the blastn search, named by adding `out_ext` to each input fasta file name.
https://www.ncbi.nlm.nih.gov/books/NBK279690/
library(ape) # Make temp dir for storing files temp_dir <- fs::dir_create(fs::path(tempdir(), "baitfindR_example")) # Write out ape::woodmouse dataset as DNA data(woodmouse) ape::write.FASTA(woodmouse, fs::path(temp_dir, "woodmouse.fasta")) ape::write.FASTA(woodmouse, fs::path(temp_dir, "woodmouse2.fasta")) # Make blast database build_blast_db( fs::path(temp_dir, "woodmouse.fasta"), db_type = "nucl", out_name = "wood", parse_seqids = TRUE, wd = temp_dir)#> #> #> Building a new DB, current time: 05/15/2019 16:40:37 #> New DB name: /tmp/RtmpeNC9nF/baitfindR_example/wood #> New DB title: /tmp/RtmpeNC9nF/baitfindR_example/woodmouse.fasta #> Sequence type: Nucleotide #> Keep MBits: T #> Maximum file size: 1000000000B #> Adding sequences from FASTA; added 15 sequences in 0.0124049 seconds.#> $status #> [1] 0 #> #> $stdout #> [1] "\n\nBuilding a new DB, current time: 05/15/2019 16:40:37\nNew DB name: /tmp/RtmpeNC9nF/baitfindR_example/wood\nNew DB title: /tmp/RtmpeNC9nF/baitfindR_example/woodmouse.fasta\nSequence type: Nucleotide\nKeep MBits: T\nMaximum file size: 1000000000B\nAdding sequences from FASTA; added 15 sequences in 0.0124049 seconds.\n" #> #> $stderr #> [1] "" #> #> $timeout #> [1] FALSE #># Blast the original sequences against the database blast_n_list( fasta_folder = temp_dir, fasta_pattern = "fasta", database_path = fs::path(temp_dir, "wood") )#> [1] "c90963e4b507281024a89cbb54d8074f"# Take a look at the results. readr::read_tsv( fs::path(temp_dir, "woodmouse.tsv"), col_names = FALSE )#> #> #>#> # A tibble: 225 x 12 #> X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 No305 No305 100 964 0 0 2 965 2 965 0 1773 #> 2 No305 No1103S 98.5 960 14 0 2 961 2 961 0 1700 #> 3 No305 No306 98.2 964 17 0 2 965 2 965 0 1694 #> 4 No305 No0912S 98.3 960 16 0 2 961 2 961 0 1688 #> 5 No305 No1206S 98.1 960 18 0 2 961 2 961 0 1677 #> 6 No305 No1202S 98.1 960 18 0 2 961 2 961 0 1677 #> 7 No305 No1007S 98.1 960 18 0 2 961 2 961 0 1677 #> 8 No305 No0909S 98.1 960 18 0 2 961 2 961 0 1677 #> 9 No305 No0908S 98.1 960 18 0 2 961 2 961 0 1677 #> 10 No305 No304 98.0 961 19 0 2 962 2 962 0 1677 #> # … with 215 more rows#> #> #> col_character(), #> col_character(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double(), #> col_double() #>#> # A tibble: 225 x 12 #> X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 No305 No305 100 964 0 0 2 965 2 965 0 1773 #> 2 No305 No1103S 98.5 960 14 0 2 961 2 961 0 1700 #> 3 No305 No306 98.2 964 17 0 2 965 2 965 0 1694 #> 4 No305 No0912S 98.3 960 16 0 2 961 2 961 0 1688 #> 5 No305 No1206S 98.1 960 18 0 2 961 2 961 0 1677 #> 6 No305 No1202S 98.1 960 18 0 2 961 2 961 0 1677 #> 7 No305 No1007S 98.1 960 18 0 2 961 2 961 0 1677 #> 8 No305 No0909S 98.1 960 18 0 2 961 2 961 0 1677 #> 9 No305 No0908S 98.1 960 18 0 2 961 2 961 0 1677 #> 10 No305 No304 98.0 961 19 0 2 962 2 962 0 1677 #> # … with 215 more rows