import random import math #gcplus model gcplus = [[]] gcplus[0] = [0.18, 0.274, 0.426, 0.12] gcplus.append([0.172, 0.368, 0.274, 0.188]) gcplus.append([0.161, 0.339, 0.375, 0.125]) gcplus.append([0.079, 0.355, 0.384, 0.182]) # a different model gcminus = [[]] gcminus[0] = [0.1, 0.1, 0.1, 0.7] gcminus.append([0.1, 0.1, 0.7, 0.1]) gcminus.append([0.1, 0.7, 0.1, 0.1]) gcminus.append([0.7, 0.1, 0.1, 0.1]) # map symbols to their columns dictionary = {'a':0, 'c':1, 'g':2, 't':3} #generate a string of length L using a model def generate(L): current = 'a' # current state s = '' # generated string for i in range(0,L): summ = 0 r = random.random() # by default 0-1 for c in range(0,4): # for each column in the model summ = summ + gcminus[dictionary[current]][c] # sum the probabilities if(r < summ): # if the random value is less then the sum use that column if(c == 0): # based on the column pick the state/symbol current = 'a' if(c == 1): current = 'c' if(c == 2): current = 'g' if(c == 3): current = 't' print(current, end="") break # calculate the log-odds ratio of two models for a given string def compare(x): s = 0 for i in range(1,len(x)): aplus = gcplus[dictionary[x[i-1]]][dictionary[x[i]]] aminus = gcminus[dictionary[x[i-1]]][dictionary[x[i]]] s = s + math.log(aplus/aminus,2) print(s)