merging updates

chrisamiller · Nov 30, 2016 · 9f06aec · 9f06aec
1 parent 07b90b6
commit 9f06aec
Show file tree

Hide file tree

Showing 8 changed files with 26 additions and 34 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: copyCat
 Title: Find regions of genomic copy number loss and gain from short reads
-Version: 1.6.10
+Version: 1.6.11
 Author: Chris Miller
 Description: Uses the depth of short reads in windows across the genome to 
 	     identify regions of genomic copy-number gain and loss
 Maintainer: Chris Miller <chrisamiller@gmail.com>
 LazyLoad: yes
 License: Apache License 2.0 | file LICENSE
 Depends: methods, foreach, doMC, IRanges, DNAcopy, stringr
-Packaged: 2016-09-30 09:34:14 AM; cmiller
+Packaged: 2016-11-30 12:45:10 PM; cmiller
diff --git a/R/cnSegments.R b/R/cnSegments.R
@@ -237,7 +237,7 @@ trimSegmentEnds <- function(segs,rdo){
 ##-----------------------------------------------
 ## remove segments that overlap at least n% with a reference assembly gap
 ##
-removeGapSpanningSegments <- function(segs,rdo,maxOverlap=0.75,gapExpansion=1){
+removeGapSpanningSegments <- function(segs,rdo,maxOverlap=0.75){
   count = length(segs[,1]);
 
   if(!(file.exists(paste(rdo@params$annotationDirectory,"/gaps.bed",sep="")))){
@@ -246,19 +246,8 @@ removeGapSpanningSegments <- function(segs,rdo,maxOverlap=0.75,gapExpansion=1){
     return(segs)
   }
 
-  gaps = read.table(paste(rdo@params$annotationDirectory,"/gaps.bed",sep=""),stringsAsFactors=F)
-
-  #expand the gaps to catch adjacent reads - especially useful for filtering near centromeres in single-sample mode
-  for(i in 1:length(gaps[,1])){
-    span = gaps[i,3]-gaps[i,2]
-    pad = ((span*gapExpansion)-span)/2
-    gaps[i,2]=gaps[i,2]-pad
-    gaps[i,3]=gaps[i,3]+pad
-    if(gaps[i,2]<1){
-      gaps[i,2]=1
-    }
-  }
-
+  gaps = read.table(paste(rdo@params$annotationDirectory,"/gaps.bed",sep=""))
+
   #intersect each chromosome separately
   newsegs = foreach(chr=names(rdo@chrs), .combine="rbind") %do%{
 
@@ -321,6 +310,8 @@ removeCoverageArtifacts <- function(segs,rdo){
 
   ##get a dataframe with all the counts
   df = makeDf(rdo@chrs,rdo@binParams)
+  df$chr=as.character(df$chr)
+
   for(i in 1:length(segs[,1])){
     med = getMedianDepth(df, segs[i,1], segs[i,2], segs[i,3])
     if((med < rdo@binParams$med/5) ||

diff --git a/R/getReadDepth.R b/R/getReadDepth.R
@@ -30,11 +30,11 @@ getWindowBins <- function(rdo){
   }
 
   ##remove any columns with no data - all zeros
-  for(i in names(winds)){
-    if(sum(as.numeric(winds[[i]]))==0){
-      winds[[i]] <- NULL
-    }
-  }
+  ## for(i in names(winds)){
+  ##   if(sum(as.numeric(winds[[i]]))==0){
+  ##     winds[[i]] <- NULL
+  ##   }
+  ## }
 
   ##sanity check
   if(length(winds) < 3){

diff --git a/R/meta.R b/R/meta.R
@@ -151,7 +151,12 @@ runPairedSampleAnalysis <- function(annotationDirectory, outputDirectory, normal
   writeSegs(segs,rdo,"segs.paired.dat")
   writeSegs(alts,rdo,"alts.paired.dat")
 
+  #save image
+  if(!is.null(rDataFile)){
+    save.image(paste(outputDirectory,rDataFile,sep="/"))
+  }
 
+
   if(outputSingleSample){
     ##segment the non-paired data using CBS
     segs = cnSegments(rdo2)
@@ -185,9 +190,5 @@ runPairedSampleAnalysis <- function(annotationDirectory, outputDirectory, normal
 
   dumpParams(rdo)
   dumpParams(rdo2)
-
-  if(!is.null(rDataFile)){
-    save.image(paste(outputDirectory,rDataFile,sep=""))
-  }
 
 }
diff --git a/R/zzz.R b/R/zzz.R
@@ -19,5 +19,5 @@
   sillyname(methods)
 
   initRdClass()
-  packageStartupMessage("Using copyCat version 1.6.10")
+  packageStartupMessage("Using copyCat version 1.6.11")
 }
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-The copyCat package for R can detect somatic copy number aberrations by measuring the depth of coverage obtained by massively parallel sequencing of the genome. It achieves higher accuracy than many other packages, and runs faster by utilizing multi-core architectures to parallelize the processing of these large data sets.
+The copyCat package for R can detect somatic copy number aberrations by measuring the depth of coverage obtained by massively parallel sequencing of the genome. It achiev0;95;ces higher accuracy than many other packages, and runs faster by utilizing multi-core architectures to parallelize the processing of these large data sets.
 
 copyCat takes in paired samples (tumor and normal) and can utilize mutation frequency information from samtools to help correct for purity and ploidy. This package also includes a method for effectively increasing the resolution obtained from low-coverage experiments by utilizing breakpoint information from paired end sequencing to do positional refinement.  It's primary input comes from running bam-window (https://github.com/genome-vendor/bam-window) on the tumor and normal bam files. 
 

diff --git a/debian/changelog b/debian/changelog
@@ -1,3 +1,9 @@
+r-cran-copycat (1.6-11) lucid; urgency=low
+
+  * factor mismatch issue resolved
+
+ -- Chris Miller <cmiller@genome.wustl.edu>  Mon, 30 Nov 2016 09:37:00 -0500
+
 r-cran-copycat (1.6-9) lucid; urgency=low
 
   * per-readlength=false handled correctly, pass verbose arg to samtools functions

diff --git a/man/removeGapSpanningSegments.Rd b/man/removeGapSpanningSegments.Rd
@@ -10,7 +10,7 @@
   false-positive calls.
 }
 \usage{
-  removeGapSpanningSegments(segs,rdo,maxOverlap=0.75,gapExpansion=1)
+  removeGapSpanningSegments(segs,rdo,maxOverlap=0.75)
 }
 
 \arguments{
@@ -26,12 +26,6 @@
   \item{maxOverlap}{
     if a segment overlaps at least this much with a gap, it will be removed.
   }
-  \item{gapExpansion}{
-    fractional value that controls how far to expand gaps beyond the
-    input gap locations.  Example: a value of 1.2 would make the gap 20%
-    larger. This is useful in single-sample analysis when
-    centromere-adjacent regions tend to inappropriately pile up reads
-  }
 }
 \value{
   returns a dataframe with 5 columns: