-
Notifications
You must be signed in to change notification settings - Fork 4
/
ORFmaker.py
executable file
·102 lines (90 loc) · 3.89 KB
/
ORFmaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python3
# Copyright 2011-2014 Francisco Pina Martins <[email protected]>
# This file is part of 4Pipe4.
# 4Pipe4 is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 4Pipe4 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with 4Pipe4. If not, see <http://www.gnu.org/licenses/>.
import re
def DealWithSNPS(List):
'''Recalculates the SNP position in the protein and if necessary R&C's
the SNP.'''
majororfs = {}
for keys in List:
snps = re.search('#.*_', keys).group(0)[1:-1].split('#')
ref = {}
v1 = int(re.search('\[.* - ', keys).group(0)[1:-3])
v2 = int(re.search(' - .*\]', keys).group(0)[3:-1])
values = [v1, v2]
if values[0] < values[1]:
for i in snps:
if int(re.search('^\d*', i).group(0)) >= values[0] and \
int(re.search('^\d*', i).group(0)) <= values[1]:
ref[int(re.search('^\d*', i).group(0))-values[0] +
1] = re.search('\D*$', i).group(0)
newpos = str(ref)
majororfs[keys + ' ' + newpos] = List[keys]
else:
for i in snps:
if int(re.search('^\d*', i).group(0)) >= values[1] and \
int(re.search('^\d*', i).group(0)) <= values[0]:
reversible = re.search('\D*$', i).group(0)
for k, v in {'A': 't', 'G': 'c', 'T': 'a',
'C': 'g'}.items():
if k in reversible:
reversible = reversible.replace(k, v)
reversible = reversible.upper()
ref[abs(int(re.search('^\d*', i).group(0)) - values[0])
+ 1] = reversible
newpos = str(ref)
majororfs[keys + ' ' + newpos] = List[keys]
return(majororfs)
def BestORF(orffasta):
'''Creates a FASTA file with only the best ORFs found. This means the
longest ORF that contains at least a SNP. It also discards ORFs that did
not contain SNPs and SNPs that are not in ORFs.'''
orftuple = tuple(orffasta.readlines())
orffasta.close()
orfs = {}
for lines in orftuple:
if lines.startswith('>'):
valid = 0
title = lines.strip('>\n ')
step1 = re.search('#.*_', lines).group(0)[:-1]
snps = list(map(int, re.sub('[A-Z]', '', step1)[1:].split('#')))
v1 = int(re.search('\[.* - ', lines).group(0)[1:-3])
v2 = int(re.search(' - .*\]', lines).group(0)[3:-1])
values = [v1, v2]
for i in snps:
if i >= min(values) and i <= max(values):
valid = 1
orfs[title] = ''
elif valid == 1:
orfs[title] = orfs[title] + lines
return(orfs)
def ORFwriter(Dict, orffasta_file):
'''Writes down the BestOrf file after all the selections and trims are
done.'''
outfile = ((re.match('^.*\.', orffasta_file).group(0)[0:-8])
+ 'BestORF.fasta')
print(outfile)
bestorf = open(outfile, 'w')
for k, v in Dict.items():
k = re.sub('#.*_\d*', '', k)
bestorf.write('>' + k + '\n')
bestorf.write(v + '\n')
def RunModule(orffasta_file):
orffasta = open(orffasta_file, 'r')
List = BestORF(orffasta)
ImprovedList = DealWithSNPS(List)
ORFwriter(ImprovedList, orffasta_file)
if __name__ == "__main__":
from sys import argv
# Usage: python3 ORFmaker.py orffile.fasta
RunModule(argv[1])