% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Lineage.R
\name{readIgphyml}
\alias{readIgphyml}
\title{Read in output from IgPhyML}
\usage{
readIgphyml(
  file,
  id = NULL,
  format = c("graph", "phylo"),
  collapse = FALSE,
  branches = c("mutations", "distance")
)
}
\arguments{
\item{file}{IgPhyML output file (.tab).}

\item{id}{ID to assign to output object.}

\item{format}{if \code{"graph"} return trees as igraph \code{graph} objects. 
if \code{"phylo"} return trees as ape \code{phylo} objects.}

\item{collapse}{if \code{TRUE} transform branch lengths to units of substitutions, 
rather than substitutions per site, and collapse internal nodes
separated by branches < 0.1 substitutions. Will also remove all
internal node labels, as it makes them inconsistent.}

\item{branches}{if \code{"distance"} branch lengths are in expected mutations per
site. If \code{"mutations"} branches are in expected mutations.}
}
\value{
A list containing IgPhyML model parameters and estimated lineage trees. 
          
          Object attributes:
          \itemize{
            \item  \code{param}:     Data.frame of parameter estimates for each clonal 
                                     lineage. Columns include: \code{CLONE}, which is the 
                                     clone id; \code{NSEQ}, the total number of sequences in 
                                     the lineage; \code{NSITE}, the number of codon sites;
                                     \code{TREE_LENGTH}, the sum of all branch lengths in 
                                     the estimated lineage tree; and \code{LHOOD}, the log 
                                     likelihood of the clone's sequences given the tree and
                                     parameters. Subsequent columns are parameter estimates 
                                     from IgPhyML, which will depend on the model used. 
                                     Parameter columns ending with \code{_MLE} are maximum 
                                     likelihood estimates; those ending with \code{_LCI} are 
                                     the lower 95%% confidence interval estimate; those ending 
                                     with \code{_UCI} are the upper 95%% confidence interval 
                                     estimate. The first line of \code{param} is for clone 
                                     \code{REPERTOIRE}, 
                                     which is a summary of all lineages within the repertoire.
                                     For this row, \code{NSEQ} is the total number of sequences, 
                                     \code{NSITE} is the average number of sites, and
                                     \code{TREE_LENGTH} is the mean tree length. For most 
                                     applications, parameter values will be the same for all 
                                     lineages within the repertoire, so access them simply by:
                                     \code{<object>$param$OMEGA_CDR_MLE[1]} to, for instance,
                                     get the estimate of dN/dS on the CDRs at the repertoire level.
            \item  \code{trees}:     List of tree objects estimated by IgPhyML. If 
                                     \code{format="graph"} these are igraph \code{graph} objects. 
                                     If \code{format="phylo"}, these are ape \code{phylo} objects.
            \item  \code{command}:   Command used to run IgPhyML.
          }
}
\description{
\code{readIgphyml} reads output from the IgPhyML phylogenetics inference package for 
B cell repertoires
}
\details{
\code{readIgphyml} reads output from the IgPhyML repertoire phylogenetics inference package. 
The resulting object is divded between parameter estimates (usually under the HLP19 model),
which provide information about mutation and selection pressure operating on the sequences.

Trees returned from this function are either igraph objects or phylo objects, and each may be 
visualized accordingly. Futher, branch lengths in tree may represent either the expected number of
substitutions per site (codon, if estimated under HLP or GY94 models), or the total number of 
expected substitutions per site. If the latter, internal nodes - but not tips - separated by branch
lengths less than 0.1 are collapsed to simplify viewing.
}
\examples{
\dontrun{
   # Read in and plot a tree from an igphyml run
   library(igraph)
   s1 <- readIgphyml("IB+7d_lineages_gy.tsv_igphyml_stats_hlp.tab", id="+7d")
   print(s1$param$OMEGA_CDR_MLE[1])
   plot(s1$trees[[1]], layout=layout_as_tree, edge.label=E(s1$trees[[1]])$weight)
}

}
\references{
\enumerate{
  \item  Hoehn KB, Lunter G, Pybus OG - A Phylogenetic Codon Substitution Model for Antibody 
             Lineages. Genetics 2017 206(1):417-427
             https://doi.org/10.1534/genetics.116.196303 
 \item  Hoehn KB, Vander Heiden JA, Zhou JQ, Lunter G, Pybus OG, Kleinstein SHK - 
             Repertoire-wide phylogenetic models of B cell molecular evolution reveal 
             evolutionary signatures of aging and vaccination. bioRxiv 2019  
             https://doi.org/10.1101/558825 
}
}
