-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGeneious_Annotations_to_BED_File_Converter.R
139 lines (83 loc) · 4.31 KB
/
Geneious_Annotations_to_BED_File_Converter.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
####################################################
# #
# Geneious Annotations.csv to BED formatted.tsv #
# file Converter #
# #
# Created by Kyle Drover #
# #
####################################################
#Installs and loads the required packages
#packages = c("dplyr","tidyr","readr")
require("dplyr")
#install.packages("dplyr", repos = "http://cran.us.r-project.org")
library("dplyr")
#install.packages("tidyr", repos = "http://cran.us.r-project.org")
require("tidyr")
library("tidyr")
#install.packages("readr", repos = "http://cran.us.r-project.org")
require("readr")
library("readr")
Convert_to_BED <- function(file_path, chromosome, track_name, track_description, browser_position)
{
# Converts Annotation data exported as a csv file to a BED formatted file for UCSC Genome Browser
# prompt user for the file path (needs to be one long line as it interferes with the code otherwise)
directory = "//mhsdata.anu.edu.au/mhs/workgroups/jcsmr/ArkellLab/"
complete_path = paste(directory, file_path, sep = "")
#Read annotations from specificied csv file
Annotations <- read_csv(complete_path,
col_types = cols(End = col_integer(),
Start = col_number()))
names(Annotations)<-str_replace_all(names(Annotations), c(" " = "_" , "\\)" = "", "\\(" = ""))
#Converts annotations into BED format
BED_Format <- Annotations %>%
select(-Length, -Sequence_Name) %>%
mutate(Start = Min_with_gaps + 1) %>%
mutate(End = Max_with_gaps + 1) %>%
mutate(Name_ordered = Name) %>%
select(-Name, -Min_with_gaps, -Max_with_gaps)
chr_column <- rep(chromosome, length(Annotations$Name))
BED_df <- data.frame(chr_column,BED_Format)
#Writes the paramaters required for the UCSC genome browser to the user specified file
parameters = paste('track name="',track_name,'" description="',
track_description,'" visibility=3 itemRgb="On"',
'\n','browser position= "', noquote(browser_position),'" ',
sep = "")
name_of_BED_file = paste("//mhsdata.anu.edu.au/mhs/workgroups/jcsmr/ArkellLab/Bio_Informatics/Files/","Hg38","-",as.character.Date(Sys.Date()),".tsv", sep = "")
print(name_of_BED_file)
file.create(name_of_BED_file)
f <- file(name_of_BED_file, open="w")
write.table(parameters, file = f, sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)
#Writes the BED formatted data to the user inputed file.
write.table(BED_df,"//mhsdata.anu.edu.au/mhs/workgroups/jcsmr/ArkellLab/Bio_Informatics/Files/Data.tsv", sep = "\t",
col.names = FALSE, row.names = FALSE, quote = FALSE)
file.append(name_of_BED_file, "Data.tsv")
file.remove("//mhsdata.anu.edu.au/mhs/workgroups/jcsmr/ArkellLab/Bio_Informatics/Files/Data.tsv")
#Creates a new file if the f
Final_file = "//mhsdata.anu.edu.au/mhs/workgroups/jcsmr/ArkellLab/Bio_Informatics/Files/Complete_Annotations_ZIC2_Hg38.tsv"
file.create(Final_file)
complete_annotations <- file(Final_file, open="w")
file.append(Final_file, name_of_BED_file)
}
user_parameters = commandArgs(trailingOnly = TRUE)
#paste the arguments together as windows batch seperates arguments by spaces, identifies and removes tag.
start_counter = 1
end_counter = 1
pasted_arguments = c()
for (word in as.character(user_parameters)) {
if (grepl("\\*",word)){
tagged_arg = paste(user_parameters[c(start_counter:end_counter)], collapse = " ")
argument = gsub("[*].*$","",tagged_arg)
pasted_arguments = c(pasted_arguments, argument)
start_counter = end_counter + 1
end_counter = start_counter
}
else {
end_counter = end_counter + 1
}
}
x1 = pasted_arguments[1]
x2 = pasted_arguments[2]
x3 = pasted_arguments[3]
x4 = pasted_arguments[4]
x5 = pasted_arguments[5]
Convert_to_BED(file_path = x1,chromosome = x2, track_name = x3,track_description = x4, browser_position = x5)