Skip to content

Commit

Permalink
wiki
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanxw committed May 2, 2014
2 parents 487e82d + ed4e581 commit bbe3748
Show file tree
Hide file tree
Showing 13 changed files with 320 additions and 186 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
# Compiled Static libraries
*.lai
*.la
paper
5 changes: 2 additions & 3 deletions AnnotationInputFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ class AnnotationInputFile{
} while (this->line.empty());
}



switch (this->format){
case VCF:
stringTokenize(line, "\t ", &fd);
Expand All @@ -157,7 +155,8 @@ class AnnotationInputFile{
break;
case PLINK:
stringNaturalTokenize(line, "\t ", &fd);
if (fd.size() < 10) return false;
// will access fd[6], so at least 7 elements here
if (fd.size() <= 7) return false;
*chrom = fd[0];
*pos = toInt(fd[2]);
*ref = fd[3];
Expand Down
2 changes: 1 addition & 1 deletion Argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ class ParameterParser{
continue;
}

bool isLongParam = false; // I did not make use of this variable.
// bool isLongParam = false; // I did not make use of this variable.
unsigned int choppedLeadingDash = 0;
// user may input ---flag, ----flag, ..., and we will chop all leading -
// user may also input ---, ----, but I don't understand what that means, so report error
Expand Down
219 changes: 113 additions & 106 deletions FreqTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,120 +4,127 @@

template <class T>
class FreqTable{
public:
void add(const T& t) {
if (this->data.find(t) == this->data.end()) {
this->data[t] = 1;
} else {
this->data[t] ++;
}
this->isSorted = false;
};
void remove(const T& t) {
if (this->data.find(t) == this->data.end()) {
return false;
}
this->data[t] -- ;
this->isSorted = false;
};
size_t size() const{ return this->data.size();};
// return the frequency in ascending order
void at(const unsigned int idx, T* t, int* v) {
if (!this->isSorted)
this->sortByFrequency();
*v = this->orderedData[idx].first;
*t = *(this->orderedData[idx].second);
};
void clear() {
this->data.clear();
this->orderedData.clear();
this->isSorted = false;
};
void dump() {
for (size_t i = 0; i < this->orderedData.size(); ++i) {
std::cout << i << "\t"
<< orderedData[i].first << "\t"
<< *(orderedData[i].second) << "\n";
}
};

private:
void sortByFrequency() {
this->sortByKey();
std::stable_sort(this->orderedData.begin(), this->orderedData.end(), FreqTable::sortFirstInPair);
this->isSorted = true;
/* dump(); */
};
void sortByKey() {
this->orderedData.clear();
typename std::map<T, int >::iterator it;
for (it = this->data.begin();
it != this->data.end() ; it++) {
this->orderedData.push_back(std::make_pair( (*it).second, &((*it).first)) );
}
/* dump(); */
/* std::stable_sort(this->orderedData.begin(), this->orderedData.end(), FreqTable::sortSecondInPair); */
/* dump(); */
this->isSorted = true;
};
static bool sortFirstInPair( const std::pair<int, const T*>& t1,
const std::pair<int, const T*>& t2) {
return t1.first < t2.first;
};
static bool sortSecondInPair( const std::pair<int, const T*>& t1,
const std::pair<int, const T*>& t2) {
return *(t1.second) < *(t2.second);
};
std::map< T, int > data;
std::vector< std::pair<int, const T*> > orderedData;
bool isSorted;
};
public:
void add(const T& t) {
if (this->data.find(t) == this->data.end()) {
this->data[t] = 1;
} else {
this->data[t] ++;
}
this->isSorted = false;
}
/**
* if successfully minus one count, @return true; otherwise, @return false
*/
bool remove(const T& t) {
if (this->data.find(t) == this->data.end()) {
return false;
}
this->data[t] -- ;
if (this->data[t] == 0){
this->data.erase(this->data.find(t));
}
this->isSorted = false;
return true;
}
size_t size() const{ return this->data.size();};
// return the frequency in ascending order
void at(const unsigned int idx, T* t, int* v) {
if (!this->isSorted)
this->sortByFrequency();
*v = this->orderedData[idx].first;
*t = *(this->orderedData[idx].second);
}
void clear() {
this->data.clear();
this->orderedData.clear();
this->isSorted = false;
}
void dump() {
for (size_t i = 0; i < this->orderedData.size(); ++i) {
std::cout << i << "\t"
<< orderedData[i].first << "\t"
<< *(orderedData[i].second) << "\n";
}
}

private:
void sortByFrequency() {
this->sortByKey();
std::stable_sort(this->orderedData.begin(), this->orderedData.end(), FreqTable::sortFirstInPair);
this->isSorted = true;
/* dump(); */
}
void sortByKey() {
this->orderedData.clear();
typename std::map<T, int >::iterator it;
for (it = this->data.begin();
it != this->data.end() ; it++) {
this->orderedData.push_back(std::make_pair( (*it).second, &((*it).first)) );
}
/* dump(); */
/* std::stable_sort(this->orderedData.begin(), this->orderedData.end(), FreqTable::sortSecondInPair); */
/* dump(); */
this->isSorted = true;
}
static bool sortFirstInPair( const std::pair<int, const T*>& t1,
const std::pair<int, const T*>& t2) {
return t1.first < t2.first;
}
static bool sortSecondInPair( const std::pair<int, const T*>& t1,
const std::pair<int, const T*>& t2) {
return *(t1.second) < *(t2.second);
}
std::map< T, int > data;
std::vector< std::pair<int, const T*> > orderedData;
bool isSorted;
};

//////////////////////////////////////////////////////////////////////
// Test case 1 //
/*
FreqTable<std::string> codonFreq;
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("C");
codonFreq.add("D");
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("C");
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("A");
std::string s;
int f;
for (unsigned int i = 0 ; i < codonFreq.size(); i++) {
codonFreq.at(i, &s, &f);
printf("freq of %s is %d\n", s.c_str(), f);
};
return 0;
FreqTable<std::string> codonFreq;
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("C");
codonFreq.add("D");
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("C");
codonFreq.add("A");
codonFreq.add("B");
codonFreq.add("A");
std::string s;
int f;
for (unsigned int i = 0 ; i < codonFreq.size(); i++) {
codonFreq.at(i, &s, &f);
printf("freq of %s is %d\n", s.c_str(), f);
};
return 0;
*/

// Test case 2 //
/*
FreqTable<char> codonFreq;
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('C');
codonFreq.add('D');
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('C');
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('A');
char s;
int f;
for (unsigned int i = 0 ; i < codonFreq.size(); i++) {
codonFreq.at(i, &s, &f);
printf("freq of %c is %d\n", s, f);
};
return 0;
FreqTable<char> codonFreq;
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('C');
codonFreq.add('D');
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('C');
codonFreq.add('A');
codonFreq.add('B');
codonFreq.add('A');
char s;
int f;
for (unsigned int i = 0 ; i < codonFreq.size(); i++) {
codonFreq.at(i, &s, &f);
printf("freq of %c is %d\n", s, f);
};
return 0;
*/

Expand Down
4 changes: 2 additions & 2 deletions Gene.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,10 +341,10 @@ class Gene{
// we define essential splice site is (GU...AG) in the intron, and next to exon
// and GU, AG both have length 2.
for (unsigned int i = 0; i < exonNumber - 1; i++ ) {
if (this->isInRange(variantPos, this->exon[i].end+1, this->exon[i].end+1+2)) {
if (this->isInRange(variantPos, this->exon[i].end + 1, this->exon[i].end + 2)) {
*isEssentialSpliceSite = true;
return true;
} else if (this->isInRange(variantPos, this->exon[i+1].start - 1 - 2, this->exon[i+1].start - 1)) {
} else if (this->isInRange(variantPos, this->exon[i+1].start - 2, this->exon[i+1].start - 1)) {
*isEssentialSpliceSite = true;
return true;
}
Expand Down
54 changes: 50 additions & 4 deletions GeneAnnotation.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class GeneAnnotation{
// output frequency files
std::string fn = outputFileName;


// output annotation frequency (all types of annotation)
std::string ofs = fn+".anno.frq";
this->printAnnotationFrequency(ofs.c_str());
Expand All @@ -105,6 +104,9 @@ class GeneAnnotation{
fprintf(stderr, "DONE: Generated frequency of each highest priority annotation type in [ %s ].\n", ofs.c_str());
LOG << "Generate frequency of high priority for highest priority annotation type in " << ofs << " succeed!\n";

// output Ts/Tv ratio
this->printTsTvRatio();

// output base change frequency
ofs = fn+".base.frq";
this->printBaseChangeFrequency(ofs.c_str());
Expand Down Expand Up @@ -262,7 +264,7 @@ class GeneAnnotation{
void printBaseChangeFrequency(const char* fileName){
FILE* fp = fopen(fileName, "wt");
assert(fp);
unsigned int n = this->baseFreq.size();
const unsigned int n = this->baseFreq.size();
for (unsigned int i = 0; i < n; i++){
std::string k;
int freq;
Expand All @@ -271,10 +273,11 @@ class GeneAnnotation{
}
fclose(fp);
};

void printCodonChangeFrequency(const char* fileName){
FILE* fp = fopen(fileName, "wt");
assert(fp);
unsigned int n = this->codonFreq.size();
const unsigned int n = this->codonFreq.size();
for (unsigned int i = 0; i < n; i++){
std::string k;
int freq;
Expand All @@ -286,7 +289,7 @@ class GeneAnnotation{
void printIndelLengthFrequency(const char* fileName){
FILE* fp = fopen(fileName, "wt");
assert(fp);
unsigned int n = this->indelLengthFreq.size();
const unsigned int n = this->indelLengthFreq.size();
for (unsigned int i = 0; i < n; i++){
int key;
int freq;
Expand All @@ -295,6 +298,49 @@ class GeneAnnotation{
}
fclose(fp);
};
void printTsTvRatio(){
const unsigned int n = this->baseFreq.size();
unsigned int ts = 0;
unsigned int tv = 0;
for (unsigned int i = 0; i < n; i++){
std::string k;
int freq;
this->baseFreq.at(i, &k, &freq);

// summarize ts/tv
if (k.size() == 4 || // only process A->T type of mutation
k[1] == '-' ||
k[2] == '>') {
char c1 = k[0];
char c2 = k[3];
// flip G to A and T to C
// then compare if flipped alleles are the same.
if (c1 == 'G') c1 = 'A';
if (c1 == 'T') c1 = 'C';
if (c2 == 'G') c2 = 'A';
if (c2 == 'T') c2 = 'C';
if (c1 != 'A' && c1 != 'C') continue;
if (c2 != 'A' && c2 != 'C') continue;
if (c1 == c2) {
ts += freq;
} else {
tv += freq;
}
}
}
// output ts/tv statistics
if (tv != 0) {
double tstv = 1.0 * ts / tv;
LOG << "Ts/Tv ratio: " << tstv << "\n";
fprintf(stderr, "Ts/Tv ratio: %g\n", tstv);
} else {
LOG << "Ts/Tv ratio: NA\n";
fprintf(stderr, "Ts/Tv ratio: NA\n");
}
LOG << "Ts observed: " << ts << " times; Tv observed: " << tv << " times.\n";
fprintf(stderr, "Ts observed: %d times; Tv observed: %d times.\n", ts, tv);
};

private:
// make sure genes are ordered
void sortGene() {
Expand Down
2 changes: 1 addition & 1 deletion GitVersion.h
Original file line number Diff line number Diff line change
@@ -1 +1 @@
const char *gitVersion = "71474f5c1f48a7986b0447ee7c10e6f800044220";
const char *gitVersion = "4589689c7fbbf98e655ba889592cdf3c64f587c2";
Loading

0 comments on commit bbe3748

Please sign in to comment.