mosaic/000755 000766 000024 00000000000 11334407734 012266 5ustar00lkmstaff000000 000000 mosaic/data/000755 000766 000024 00000000000 11332660372 013174 5ustar00lkmstaff000000 000000 mosaic/DESCRIPTION000644 000766 000024 00000001005 11334407737 013773 0ustar00lkmstaff000000 000000 Package: mosaic Title: Mosaic: calculates the 'mosaicity' of a one dimensional hybrid zone Version: 0.1-1 Date: 2010-02-09 Author: Leithen K. M'Gonigle and Richard G. FitzJohn Maintainer: Leithen K. M'Gonigle Depends: R (>= 2.4) Description: This package uses likelihood to fit step-wise models to one dimensional hybrid zone data, and to estimate the 'mosaicity' of the hybrid zone. License: GPL (>=2) URL: http://www.zoology.ubc.ca/prog/mosaic Packaged: 2010-02-10 01:46:07 UTC; lkm mosaic/man/000755 000766 000024 00000000000 11334372315 013035 5ustar00lkmstaff000000 000000 mosaic/NAMESPACE000644 000766 000024 00000000153 11334370665 013506 0ustar00lkmstaff000000 000000 export(make.mosaic.logLik) export(fit.mosaic) export(plotMosaic) export(mosaicity) S3method(plot, mosaic) mosaic/R/000755 000766 000024 00000000000 11334367412 012465 5ustar00lkmstaff000000 000000 mosaic/R/fit.mosaic.R000755 000766 000024 00000005555 11334367253 014664 0ustar00lkmstaff000000 000000 ## a modified version of all.models that stops searching for models of ## higher numbers of steps once they are no longer significant given a ## p value of 0.05 and significance level of sig.level (using a chi ## square test with 1 degree of freedom to test for significance) fit.mosaic <- function(data, sig.level, N=50, ngens=100, s=5, p.mutate=0.1, p.sex=1, p.rec=0.25, verbose=TRUE) { ## Run a model with k steps until we return a likelihood that is ## better than 'target' next.model <- function(k) { MAXREPS <- 3 for ( i in 1:MAXREPS ) { ans <- mosaic(data, logLik, k, N, ngens, s, p.mutate, p.sex, p.rec, verbose=verbose) if ( (ans$best.ll - res[[k-1]]$best.ll) > -0.001 ) return(ans) else if ( verbose > 1 ) cat("oops! failed to converge...\n") } cat("Failed to find a better model; probably rounding error\n") ans } if ( verbose ) cat("Preparing likelihood function\n") logLik <- make.mosaic.logLik(data) chiSquareValue <- qchisq(sig.level, df=2, lower.tail=FALSE)/2 n.patches <- nrow(data) n.partitions <- seq_len(n.patches-1) res <- vector("list", n.patches-1) for ( i in n.partitions ) { if ( i == 1 ) res[[1]] <- mosaic(data, logLik, i, N, ngens, s, p.mutate, p.sex, p.rec, verbose=verbose) else { res[[i]] <- next.model(i) is.signif <- res[[i]]$best.ll-res[[i-1]]$best.ll > chiSquareValue if ( !is.signif ) { if ( verbose > 1 ) cat("Second attempt; have we topped out?\n") res2 <- next.model(i) if ( res2$best.ll - res[[i-1]]$best.ll > chiSquareValue ) res[[i]] <- res2 else { if ( verbose > 1 ) cat(sprintf("\nFitted %d/%d steps.\n", i, n.patches)) break } } } } ret <- res[1:(i-1)] attr(ret, "data") <- data class(ret) <- "mosaic" ret } ## Computes mosaic statistic for a given number of partitions. mosaic <- function(data, logLik, n.partitions, N, ngens, s, p.mutate, p.sex, p.rec, max.exhaust=N*ngens*2, verbose=TRUE) { n.patches <- nrow(data) n.exhaust <- choose(n.patches-1, n.partitions) if ( n.exhaust > max.exhaust ) { if ( verbose ) cat(sprintf("GA: (%d/%d)\n", n.partitions, n.patches)) mosaic.ga(data, ngens, n.partitions, N, s, p.mutate, p.sex, p.rec, logLik) } else { if ( verbose ) cat(sprintf("Exhaust: (%d/%d)\n", n.partitions, n.patches)) mosaic.exhaust(data, n.partitions, logLik) } } ## Exhaustively search for n.partitions mosaic.exhaust <- function(data, n.partitions, logLik) { n.patches <- nrow(data) at <- combn(n.patches-1, n.partitions, simplify=FALSE) ll <- sapply(at, logLik) i <- which.max(ll) list(best.ll=ll[i], best.model=at[[i]], best.ll.t=ll[i]) } mosaic/R/ga.R000755 000766 000024 00000012033 11332646734 013206 0ustar00lkmstaff000000 000000 #### (1) Initial state ## initial.popn randomly generates N solutions for n.patches patches ## divided into n.partitions partitions. The function logLik function ## computes the log likelihood given a vector of partitions. initial.popn <- function(N, n.partitions, n.patches, logLik) { popn <- lapply(seq_len(N), function(x) sort(sample(n.patches-1, n.partitions))) w <- sapply(popn, logLik) list(popn=popn, w=w, best.w=max(w), best.model=popn[[which.max(w)]]) } #### (2) Mutation ## Next, take a population 'popn', with a fitness vector 'w' (where ## the ith element in 'w' gives the fitness of the ith model in ## 'popn'). ## ## First, draw binomial random deviates to determine how many ## mutations will happen per model (mutations happen at an individual ## partition with probability 'p.mutate', and there are ## 'n.partitions'=length(popn[[1]]) partitions available for ## mutation). ## ## Mutate the models that had at least one mutation, by passing them ## through to 'mutate'. ## ## Only mutated models need their fitness recalculated. ## ## Finally, return a list with the new population of models and their ## fitness vector. mutate <- function(popn, w, n.patches, p.mutate, logLik) { nmutants <- rbinom(length(popn), length(popn[[1]]), p.mutate) mutated <- which(nmutants>0) if ( length(mutated) > 0 ) { popn[mutated] <- mapply(mutate1, popn[mutated], nmutants[mutated], n.patches, SIMPLIFY=FALSE) w[mutated] <- sapply(popn[mutated], logLik) } list(popn=popn, w=w) } ## Take a model 'x', describing a hybrid zone of up to 'n.patches' ## patches, mutate 'n.mutants' of the steps. To do this, draw from ## all possible step locations (on 1:(n.patches-1)) without the steps ## that are in use by non mutated steps (i.e. a step can mutate to ## itself). mutate1 <- function(x, n.mutants, n.patches) { mutated <- sort(sample(length(x), n.mutants)) x[mutated] <- sample(setdiff(seq_len(n.patches-1), x[-mutated]), n.mutants) sort(x) } #### (3) Selection: ## This selection regime is fairly straightforward; sample with ## replacement s*N models from the population, then take the N best ## models. select <- function(popn, w, s) { N <- length(popn) i <- sample(N, ceiling(s*N), TRUE) take <- i[order(w[i], decreasing=TRUE)[1:N]] list(popn=popn[take], w=w[take]) } select.weighted <- function(popn, w) { N <- length(popn) take <- sample(N, N, TRUE, w-min(w)+1) list(popn=popn[take], w=w[take]) } #### (4) Recombiation ## Select a fraction of the population to be recombined. These are ## recombined with randomly selected individuals in the previous ## generation (popn0). The fitness of the recombinants is then ## calculated. recombine <- function(state1, popn0, p.sex, p.rec, n.patches, logLik) { popn1 <- state1$popn w1 <- state1$w N <- length(popn1) nrec <- rbinom(1, N, p.sex) i <- sample(N, nrec) if ( length(i) > 0 ) { b <- popn0[sample(N, nrec, TRUE)] popn1[i] <- mapply(recombine1, popn1[i], b, p=p.rec, n.patches=n.patches, SIMPLIFY=FALSE) w1[i] <- sapply(popn1[i], logLik) } list(popn=popn1, w=w1) } ## Recombination function: recombine1 <- function(a, b, p, n.patches) { a0 <- a rec <- runif(length(a)) < p if ( any(rec) ) { avail <- setdiff(b, a[!rec]) if ( length(avail) == 1 ) a[rec] <- avail else a[rec] <- sample(avail, sum(rec)) sort(a) } else { a } } #### (5) Step the population through one full generation: step <- function(state, s, n.patches, p.mutate, p.sex, p.rec, logLik) { ## Mutation state1 <- update.state(state, mutate(state$popn, state$w, n.patches, p.mutate, logLik)) ## Selection state2 <- select(state1$popn, state1$w, s) ## Recombination update.state(state1, recombine(state2, state1$popn, p.sex, p.rec, n.patches, logLik)) } ## Bookkeeping to make sure that we keep the best model update.state <- function(state0, state1) { if ( max(state1$w) > state0$best.w ) { state1$best.w <- max(state1$w) state1$best.model <- state1$popn[[which.max(state1$w)]] } else { state1$best.w <- state0$best.w state1$best.model <- state0$best.model } state1 } ## Run the GA for ngens generations, collecting the best fitness over ## time, and returning the state and best model. run <- function(x, ngens, s, n.patches, p.mutate, p.sex, p.rec, logLik) { out <- vector("numeric", ngens) for ( i in seq_len(ngens) ){ x <- step(x, s, n.patches, p.mutate, p.sex, p.rec, logLik) out[i] <- x$best.w } list(best.ll=x$best.w, best.model=x$best.model, best.ll.t=out) } ## Wrapper around run(), that generates the initial population and the ## logLik function. mosaic.ga <- function(data, ngens, n.partitions, N, s, p.mutate, p.sex, p.rec, logLik) { n.patches <- nrow(data) run(initial.popn(N, n.partitions, n.patches, logLik), ngens, s, n.patches, p.mutate, p.sex, p.rec, logLik) } mosaic/R/likelihood.R000755 000766 000024 00000002374 11332654176 014750 0ustar00lkmstaff000000 000000 make.mosaic.logLik <- function(d) { obj <- likelihood.cache(d) L <- obj$L p <- obj$p const <- obj$const n <- nrow(d) function(m) sum(L[cbind(c(1, m+1), c(m, n))]) + const } binFunc <- function(x, f, p) { g <- function(p) { m <- cbind(p^2 + f*p*(1-p), 2*p*(1-p)*(1-f), (1-p)^2 + f*p*(1-p)) m2 <- pmax(zapsmall(m), 0) # negative freqs not allowed sum(x*log(zapsmall(m2/rowSums(m2))+1e-100)) } if ( length(p) == 1 ) return(c(g(p), p)) p0 <- min(p) p1 <- max(p) if ( abs(p0 - p1) < 1e-8 ) {# Don't bother doing anything c(g(p0), p0) } else { ans <- optimize(g, c(p0, p1), maximum=TRUE) c(ans$objective, ans$maximum) } } likelihood.cache <- function(d) { N <- rowSums(d) freq.p <- d/N p <- freq.p[,1] + freq.p[,2]/2 f <- 1 - freq.p[,2]/(2 * p * (1 - p)) f[is.nan(f)] <- 1 n <- nrow(d) i <- rep(1:n, n:1) j <- sequence(n:1) + i - 1 ans <- sapply(mapply(seq, i, j), function(k) binFunc(d[k,], f[k], p[k])) ij <- cbind(i, j) ans.L <- ans.p <- matrix(NA, n, n) ans.L[ij] <- ans[1,] ans.p[ij] <- ans[2,] const <- sum(lfactorial(N) - rowSums(lfactorial(d)) + d[,2] * log(2*(1 - f + 1e-100))) list(L=ans.L, p=ans.p, const=const) } mosaic/R/plot.R000755 000766 000024 00000003575 11334367412 013603 0ustar00lkmstaff000000 000000 plotMosaic <- function(d, m, pch=20, lcol="blue", ...) { n.patches <- nrow(d) x <- seq(0, 1, length=n.patches) n <- rowSums(d[1:3]) p <- with(d, AA + Aa/2)/n plot(x, p, pch=pch, ...) usr <- par("usr") dx <- 1/(2*(n.patches - 1)) sx <- c(usr[1], rep(c(-dx, x[m] + dx, 1+dx), each=2), usr[2]) sy <- rep(step.heights(d, m), each=2) lines(sx, sy, col=lcol) } plot.mosaic <- function(x, nsteps=NULL, ...) { if ( is.null(nsteps) ) nsteps <- length(x) plotMosaic(attr(x, "data"), x[[nsteps]]$best.model, ...) } ## Cut the data at a series of points, and apply some function to each ## group. ## ## This is the workhorse function; given a vector of values 'x', and a ## vector of indices 'at', cut 'x', to produce a list: ## x[1..at[1]], x[(at[1]+1)..at[2]], ..., x[(at[n]+1)..length(x)] ## Return the result of some function 'f' applied to each of these ## sublists: ## f(x[1..at[1]]), ## f(x[(at[1]+1)..at[2]]), ## ..., ## f(x[(at[n]+1)..length(x)]) f.at <- function(x, at, f) { from <- c(0, at) + 1 to <- c(at, length(x)) lapply(seq_along(from), function(i) f(x[from[i]:to[i]])) } ## return the step heights of model m (this is a modification of your ## method and is where the time is spent...) step.heights <- function(d, m) { N <- rowSums(d) freq.p <- d/N p <- freq.p[,1] + freq.p[,2]/2 f <- 1 - freq.p[,2]/(2 * p * (1 - p)) f[is.nan(f)] <- 1 from <- c(0, m) + 1 to <- c(m, length(d[,1])) steps <- lapply(seq_along(from), function(i) from[i]:to[i]) heights <- sapply(steps, function(k) binFunc(d[k,], f[k], p[k]))[2,] c(0, heights, 1) } ## sum the steps of a dataset, returning a vector of mean step sums mosaicity <- function(d, m) { z <- step.heights(d, m) (sum(abs(diff(z))) - 1)/2 } ## nice little pfd plotting function pdf.f <- function(f, file, ...) { cat(sprintf("Writing %s\n", file)) pdf(file, ...) on.exit(dev.off()) f() } mosaic/man/fit.mosaic.Rd000644 000766 000024 00000004421 11334372260 015360 0ustar00lkmstaff000000 000000 \name{fit.mosaic} \alias{fit.mosaic} \title{Fit a Mosaic Model Using Maximum Likelihood} \description{This function fits a step-wise model to data from a one dimensional hybrid zone.} \usage{ fit.mosaic(data, sig.level, N=50, ngens=100, s=5, p.mutate=0.1, p.sex=1, p.rec=0.25, verbose=TRUE) } \arguments{ \item{data}{a data frame where columns represent genotype frequencies and rows correspond to patches in a one dimensional hybrid zone dataset. See \code{\link{hybrid.zone}} for a sample dataset and more information} \item{sig.level}{desired significance level for the number of steps fit by the model} \item{N}{the 'population size' used in the genetic algorithm (a larger population will typically result in a more accurate best fit model, but also will increase the calculation time)} \item{ngens}{number of generations to run the genetic algorithm for (more generations will typically result in a more accurate best fit model, but also will increase the calculation time)} \item{s}{strength of selection used in the genetic algorithm} \item{p.mutate}{mutation rate (e.g., probability of moving the location of a step in a model during reproduction in the genetic algorithm} \item{p.sex}{probability of sexually recombining two models to create permuted offspring models in the genetic algorithm} \item{p.rec}{a real number between 0 and 0.5 which denotes the probability that recombination occurs between two model step locations in a sexual event in the genetic algorithm} \item{verbose}{a logical value indicating the level of output as the algorithm runs} } % \details{ % I would put a more complete description of 'data' here. What are rows % and columns? % } \seealso{\code{\link{plotMosaic}} for plotting fitted models, \code{\link{mosaicity}} for summing step heights of models, \code{\link{hybrid.zone}} for a sample dataset, and \code{\link{make.mosaic.logLik}} for computing likelihoods.} \examples{ \dontrun{ data(hybrid.zone) res <- fit.mosaic(hybrid.zone, 0.05) plot(res) mosaicity(hybrid.zone, best.model) } } \references{ M'Gonigle, L.K., FitzJohn, R.G. 2010. Assortative Mating and Spatial Structure in Hybrid Zones. Evolution: 64: 444--445. } \keyword{model} \author{LKM & RGF}mosaic/man/hybrid.zone.Rd000644 000766 000024 00000001106 11334125731 015553 0ustar00lkmstaff000000 000000 \name{hybrid.zone} \alias{hybrid.zone} \docType{data} \title{Example (simulated) Hybrid Zone} \description{ A sample hybrid zone with 30 patches. } \usage{hybrid.zone} \format{a data frame where columns represent genotype frequencies and rows correspond to patches} \source{part of simulations used in M'Gonigle and FitzJohn 2010, simulated data.} \examples{ data(hybrid.zone) hybrid.zone } \references{ M'Gonigle, L.K., FitzJohn, R.G. 2010. Assortative Mating and Spatial Structure in Hybrid Zones. Evolution: 64: 444--445. } \keyword{datasets} \author{LKM & RGF} mosaic/man/make.mosaic.logLik.Rd000644 000766 000024 00000001060 11334366616 016737 0ustar00lkmstaff000000 000000 \name{make.mosaic.logLik} \alias{make.mosaic.logLik} \title{Compute likelihoods} \description{Returns a function which returns the likelihood of each possible step (partial likelihoods for all partitions are computed and cached to speed up future calculations).} \usage{ make.mosaic.logLik(d) } \arguments{ \item{d}{a hybrid zone dataset as described in \code{\link{hybrid.zone}}} } \references{ M'Gonigle, L.K., FitzJohn, R.G. 2010. Assortative Mating and Spatial Structure in Hybrid Zones. Evolution: 64: 444--445. } \author{LKM & RGF}mosaic/man/mosaicity.Rd000644 000766 000024 00000001562 11334372315 015331 0ustar00lkmstaff000000 000000 \name{mosaicity} \alias{mosaicity} \title{Sums the step heights of a best fit model.} \description{Returns the 'mosaicity' score of a model \code{m} corresponding to hybrid zone dataset \code{d}.} \usage{ mosaicity(d, m) } \arguments{ \item{d}{a hybrid zone dataset as defined in \code{\link{fit.mosaic}}} \item{m}{a model as defined in \code{\link{fit.mosaic}}} } \seealso{\code{\link{plotMosaic}} for plotting fitted models, \code{\link{hybrid.zone}} for a sample dataset, and \code{\link{make.mosaic.logLik}} for computing likelihoods.} \examples{ \dontrun{ data(hybrid.zone) res <- fit.mosaic(hybrid.zone, 0.05) best.model <- res[[length(res)]]$best.model mosaicity(hybrid.zone, best.model) } } \references{ M'Gonigle, L.K., FitzJohn, R.G. 2010. Assortative Mating and Spatial Structure in Hybrid Zones. Evolution: 64: 444--445. } \author{LKM & RGF}mosaic/man/plotMosaic.Rd000644 000766 000024 00000002404 11334367336 015445 0ustar00lkmstaff000000 000000 \name{plotMosaic} \alias{plotMosaic} \alias{plot.mosaic} \title{Plots a Mosaic Fit} \description{\code{plotMosaic} plots a hybrid dataset along with a specified model. \code{plot} can be applied directly to the output of \code{\link{fit.mosaic}} and by default plots the dataset and most likely best fit model. If \code{nsteps} is specified it plots a best fit model with \code{nsteps} steps} \usage{ plotMosaic(d, m, pch=20, lcol="blue", ...) \method{plot}{mosaic}(x, nsteps=NULL, ...) } \arguments{ \item{d}{a hybrid zone data set as defined in \code{\link{fit.mosaic}}} \item{m}{a model as defined in \code{\link{fit.mosaic}}} \item{pch}{plot style of hybrid zone data} \item{lcol}{line color of best fit model} \item{nsteps}{number of steps} \item{x}{object of class \code{mosaic}} \item{...}{additional arguments to \code{plotMosaic} (when using \code{plot}) and from \code{plotMosaic} to the underlying \code{plot} functions.} } \examples{ \dontrun{ data(hybrid.zone) res <- fit.mosaic(hybrid.zone, 0.05) plot(res) plotMosaic(hybrid.zone, res[[length(res)]]$best.model) } } \references{ M'Gonigle, L.K., FitzJohn, R.G. 2010. Assortative Mating and Spatial Structure in Hybrid Zones. Evolution: 64: 444--445. } \author{LKM & RGF}mosaic/data/hybrid.zone.csv000644 000766 000024 00000000354 11332660026 016142 0ustar00lkmstaff000000 000000 "AA" "Aa" "aa" 0 0 100 0 0 100 0 0 100 0 0 90 0 0 92 0 0 112 0 0 100 0 1 98 0 3 97 1 3 96 98 1 1 60 1 1 0 5 104 0 1 99 0 0 100 0 1 99 1 0 99 0 0 75 94 6 0 0 1 99 0 1 65 0 1 75 79 3 0 84 2 0 99 1 0 100 0 0 99 1 0 108 0 0 100 0 0 100 0 0 mosaic/data/hybrid.zone.tab000644 000766 000024 00000000353 11333127315 016114 0ustar00lkmstaff000000 000000 "AA" "Aa" "aa" 0 1 99 0 0 100 0 0 100 0 0 90 0 0 92 0 0 112 0 0 100 0 0 99 0 0 100 0 3 97 96 3 1 58 3 1 0 2 107 0 1 99 0 1 99 0 4 96 0 3 97 0 0 75 94 5 1 0 2 98 1 1 64 0 3 73 81 1 0 86 0 0 98 2 0 100 0 0 96 4 0 108 0 0 100 0 0 100 0 0