I am using PWM (Position Weight Matrix) file in meme format downloaded from JASPAR database as the input file. I am using random.choices from python to randomly generate the nucleotide motif sequence using the probability value for each position as weights.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import random
import numpy as np
def generate_nucleotide(probability):
if sum(probability) ==1:
return random.choices(['A','C','G','T'], weights=probability)
elif sum(probability) < 1:
diff = 1 - sum(probability)
index = probability.index(max(probability))
probability[index] = probability[index] + diff
return random.choices(['A','C','G','T'], weights=probability)
else:
diff = 1 - sum(probability)
index = probability.index(max(probability))
probability[index] = probability[index] - diff
return random.choices(['A','C','G','T'], weights=probability)
pwm = open('tf_selected_pwm.txt','r').read().split('letter-')[1].split('\n')
pwm = list(filter(None, pwm))
motif = ''
for i in range(1, len(pwm)-1):
p = [np.round(float(x),2) for x in pwm[i].split()]
#print(p)
motif = motif + generate_nucleotide(p)[0]
print(motif)
After running this, you will get something like this TTGCCACCAGAGGGAGCTA
.