JRaviLab · the-mayer · Oct 29, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 22, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,7 +4,6 @@ export(GCA2Lineage)
 export(IPG2Lineage)
 export(acc2FA)
 export(acc2Lineage)
-export(acc2fa)
 export(addLeaves2Alignment)
 export(addLineage)
 export(addName)

diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
@@ -40,10 +40,14 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @param y Delimitter. Default is space (" ").
 #' @seealso chartr, toupper, and tolower.
 #'
-#' @return
+#' @return Character vector with the input strings converted to title case.
+#'
 #' @export
 #'
 #' @examples
+#' # Convert a single string to title case
+#' convert2TitleCase("hello world") # Returns "Hello World"
+#' 
 convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
@@ -76,7 +80,8 @@ convert2TitleCase <- function(x, y = " ") {
 #' @importFrom stringr str_sub
 #' @importFrom tidyr replace_na separate
 #'
-#' @return
+#' @return A data frame containing the enriched alignment data with lineage 
+#' information.
 #'
 #' @details The alignment file would need two columns: 1. accession +
 #' number and 2. alignment. The protein homolog accession to lineage mapping +
@@ -203,6 +208,14 @@ addLeaves2Alignment <- function(aln_file = "",
 #' @export
 #'
 #' @examples
+#' # Example usage of the addName function
+#' data <- data.frame(
+#'   AccNum = c("ACC123", "ACC456"),
+#'   Species = c("Homo sapiens", "Mus musculus"),
+#'   Lineage = c("Eukaryota>Chordata", "Eukaryota>Chordata")
+#' )
+#' enriched_data <- addName(data)
+#' print(enriched_data)
 addName <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
@@ -278,7 +291,9 @@ addName <- function(data,
 #' @note Please refer to the source code if you have alternate +
 #' file formats and/or column names.
 #'
-#' @return
+#' @return A character string representing the FASTA formatted sequences.
+#' If `fa_outpath` is provided, the FASTA will also be saved to the specified 
+#' file.
 #' @export
 #'
 #' @examples
@@ -326,18 +341,24 @@ convertAlignment2FA <- function(aln_file = "",
 #' Default renameFA() replacement function. Maps an accession number to its name
 #'
 #' @param line The line of a fasta file starting with '>'
-#' @param acc2name Data Table containing a column of accession numbers and a name column
+#' @param acc2name Data Table containing a column of accession numbers and a 
+#' name column
 #' @param acc_col Name of the column containing Accession numbers
-#' @param name_col Name of the column containing the names that the accession numbers
+#' @param name_col Name of the column containing the names that the accession 
+#' numbers
 #' are mapped to
 #'
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A character string representing the updated FASTA line, where the 
+#' accession number is replaced with its corresponding name.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' mapAcc2Name(">P12345 some description", acc2name, "AccNum", "Name")
+#' }
 mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an addNames column
     # Find the first ' '
@@ -363,10 +384,14 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
 #' @importFrom purrr map
 #' @importFrom readr read_lines write_lines
 #'
-#' @return
+#' @return A character vector of the modified lines in the FASTA file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' renameFA("path/to/input.fasta", 
+#' "path/to/output.fasta", mapAcc2Name, acc2name)
+#' }
 renameFA <- function(fa_path, outpath,
     replacement_function = mapAcc2Name, ...) {
     lines <- read_lines(fa_path)
@@ -395,20 +420,26 @@ renameFA <- function(fa_path, outpath,
 #'
 #' @param aln_path Character. Path to alignment files.
 #' Default is 'here("data/rawdata_aln/")'
-#' @param fa_outpath Character. Path to file. Master protein file with AccNum & lineages.
+#' @param fa_outpath Character. Path to file. Master protein file with AccNum & 
+#' lineages.
 #' Default is 'here("data/rawdata_tsv/all_semiclean.txt")'
 #' @param lin_file Character. Path to the written fasta file.
 #' Default is 'here("data/alns/")'.
-#' @param reduced Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
+#' @param reduced Boolean. If TRUE, the fasta file will contain only one 
+#' sequence per lineage.
 #' Default is 'FALSE'.
 #'
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return
+#' @return NULL. The function saves the output FASTA files to the specified 
+#' directory.
 #'
-#' @details The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
-#' @note Please refer to the source code if you have alternate + file formats and/or column names.
+#' @details The alignment files would need two columns separated by spaces: 
+#' 1. AccNum and 2. alignment. The protein homolog file should have AccNum, 
+#' Species, Lineages.
+#' @note Please refer to the source code if you have alternate + file formats 
+#' and/or column names.
 #'
 #' @export
 #'
@@ -456,24 +487,29 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
 #' @author Samuel Chen, Janani Ravi
 #' @keywords accnum, fasta
 #'
-#' @param accessions  Character vector containing protein accession numbers to generate fasta sequences for.
+#' @param accessions  Character vector containing protein accession numbers to 
+#' generate fasta sequences for.
 #' Function may not work for vectors of length > 10,000
 #' @param outpath [str] Location where fasta file should be written to.
-#' @param plan
+#' @param plan Character string specifying the parallel processing strategy to 
+#' use with the `future` package. Default is "sequential".
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom future future plan value
 #' @importFrom purrr map
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return
+#' @return A logical value indicating whether the retrieval and conversion were 
+#' successful. Returns `TRUE` if successful and `FALSE` otherwise.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), 
+#' outpath = "my_proteins.fasta")
 #' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
+#' EBI:accessions <- c("P12345", "Q9UHC1", 
+#' "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
 #' }
 acc2FA <- function(accessions, outpath, plan = "sequential") {
     # validation
@@ -549,7 +585,8 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' createRepresentativeAccNum
 #' 
 #' @description
-#' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
+#' Function to generate a vector of one Accession number per distinct 
+#' observation from 'reduced' column
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
@@ -562,14 +599,16 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 #' @importFrom dplyr filter pull
 #' @importFrom rlang sym
 #'
-#' @return
+#' @return A character vector containing one Accession number per distinct 
+#' observation from the specified reduced column.
 #' @export
 #'
 #' @examples
 createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
+    # Get Unique reduced column and then bind the AccNums back to get one 
+    # AccNum per reduced column
     reduced_sym <- sym(reduced)
     accnum_sym <- sym(accnum_col)
 
@@ -603,8 +642,10 @@ createRepresentativeAccNum <- function(prot_data,
 #' @author Samuel Chen, Janani Ravi
 #'
 #' @param fasta_file Path to the FASTA file to be aligned
-#' @param tool Type of alignment tool to use. One of three options: "Muscle", "ClustalO", or "ClustalW"
-#' @param outpath Path to write the resulting alignment to as a FASTA file. If NULL, no file is written
+#' @param tool Type of alignment tool to use. One of three options: "Muscle", 
+#' "ClustalO", or "ClustalW"
+#' @param outpath Path to write the resulting alignment to as a FASTA file. 
+#' If NULL, no file is written
 #'
 #' @importFrom Biostrings readAAStringSet
 #' @importFrom msa msaClustalOmega msaMuscle msaClustalW
@@ -613,6 +654,10 @@ createRepresentativeAccNum <- function(prot_data,
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' aligned_sequences <- alignFasta("my_sequences.fasta", 
+#' tool = "Muscle", outpath = "aligned_output.fasta")
+#' }
 alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     fasta <- readAAStringSet(fasta_file)
 
@@ -641,7 +686,7 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
 #' @importFrom Biostrings toString unmasked
 #' @importFrom readr write_file
 #'
-#' @return
+#' @return Character string representing the content of the written FASTA file.
 #' @export
 #'
 #' @examples
@@ -660,11 +705,12 @@ writeMSA_AA2FA <- function(alignment, outpath) {
 
 #' getAccNumFromFA
 #'
-#' @param fasta_file
+#' @param fasta_file Character. The path to the FASTA file from which 
+#' accession numbers will be extracted.
 #'
 #' @importFrom stringi stri_extract_all_regex
 #'
-#' @return
+#' @return A character vector containing the extracted accession numbers.
 #' @export
 #'
 #' @examples

diff --git a/R/blastWrappers.R b/R/blastWrappers.R
@@ -3,17 +3,22 @@
 #' Run DELTABLAST to find homologs for proteins of interest
 #'
 #' @author Samuel Chen, Janani Ravi
+#' @description
+#' This function executes a Delta-BLAST search using the specified parameters 
+#' and database. It sets the BLAST database path, runs the Delta-BLAST command 
+#' with the given query, and outputs the results.
 #'
-#' @param deltablast_path
-#' @param db_search_path Path to the BLAST databases
-#' @param db
-#' @param query
-#' @param evalue
-#' @param out
-#' @param num_alignments
-#' @param num_threads
+#' @param deltablast_path Path to the Delta-BLAST executable.
+#' @param db_search_path Path to the BLAST databases.
+#' @param db Name of the BLAST database to search against (default is "refseq").
+#' @param query Path to the input query file.
+#' @param evalue E-value threshold for reporting matches (default is "1e-5").
+#' @param out Path to the output file where results will be saved.
+#' @param num_alignments Number of alignments to report.
+#' @param num_threads Number of threads to use for the search (default is 1).
 #'
-#' @return
+#' @return This function does not return a value; it outputs results to the 
+#' specified file.
 #' @export
 #'
 #' @examples
@@ -43,18 +48,27 @@ runDeltaBlast <- function(deltablast_path, db_search_path,
 
 #' Run RPSBLAST to generate domain architectures for proteins of interest
 #'
-#' @param rpsblast_path
-#' @param db_search_path Path to the BLAST databases
-#' @param db
-#' @param query
-#' @param evalue
-#' @param out
-#' @param num_threads
+#' @description
+#' This function executes an RPS-BLAST search to generate domain architectures 
+#' for specified proteins. It sets the BLAST database path, runs the RPS-BLAST 
+#' command with the provided query, and outputs the results.
 #'
-#' @return
+#' @param rpsblast_path Path to the RPS-BLAST executable.
+#' @param db_search_path Path to the BLAST databases.
+#' @param db Name of the BLAST database to search against (default is "refseq").
+#' @param query Path to the input query file.
+#' @param evalue E-value threshold for reporting matches (default is "1e-5").
+#' @param out Path to the output file where results will be saved.
+#' @param num_threads Number of threads to use for the search (default is 1).
+#'
+#' @return This function does not return a value; it outputs results to the 
+#' specified file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' runRSPBlast(rpsblast_path, db_search_path, query, out)
+#' }
 runRPSBlast <- function(rpsblast_path, db_search_path,
     db = "refseq", query, evalue = "1e-5",
     out, num_threads = 1) {