diana.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-"
  3. """
  4. DiaNA - 2020 - by psy (epsylon@riseup.net)
  5. You should have received a copy of the GNU General Public License along
  6. with DiaNA; if not, write to the Free Software Foundation, Inc., 51
  7. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  8. """
  9. VERSION = "v0.3_beta"
  10. RELEASE = "19032020"
  11. SOURCE1 = "https://code.03c8.net/epsylon/diana"
  12. SOURCE2 = "https://github.com/epsylon/diana"
  13. CONTACT = "epsylon@riseup.net - (https://03c8.net)"
  14. """
  15. DNA-equiv:
  16. A <-> T
  17. C <-> G
  18. """
  19. import re, os, glob, random, time, math
  20. brain_path = "resources/BRAIN/brain.in" # in/out brain-tmp file
  21. genomes_path = 'datasets/' # genome datasets raw data
  22. genomes_list_path = "datasets/genome.list" # genome list
  23. universal_primer_list_path = "resources/PATTERNS/UPL.list" # UPL list
  24. dna_codons_list_path = "resources/PATTERNS/DNAcodon.list" # DNA codon list
  25. genomes = {} # main sources dict: genome_name
  26. seeds_checked = [] # list used for random checked patterns
  27. repeats = {} # repetitions 'tmp' dict: genome_name:(repets,pattern)
  28. known_patterns = [] # list used for known patterns
  29. max_length = 50 # [MAX. LENGTH] for range [PATTERN]
  30. def convert_size(size):
  31. if (size == 0):
  32. return '0 B'
  33. size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
  34. i = int(math.floor(math.log(size,1024)))
  35. p = math.pow(1024,i)
  36. s = round(size/p,2)
  37. return s, size_name[i]
  38. def search_pattern_with_human():
  39. pattern = input("[HUMAN] [SEARCH] Pattern (ex: attacg): ").upper()
  40. print("\n"+"-"*5 + "\n")
  41. create_new_pattern(pattern) # create new pattern
  42. def try_pattern_against_all_genomes_by_genome(pattern):
  43. for k, v in genomes.items():
  44. if pattern in v:
  45. t = len(re.findall(pattern, v))
  46. repeats[k] = t, pattern # create dict: genome = times, pattern
  47. def try_pattern_against_all_genomes_by_pattern(pattern, index):
  48. p_index = 0 # pattern index
  49. for k, v in genomes.items():
  50. if pattern in v:
  51. p_index = p_index + 1
  52. t = len(re.findall(pattern, v))
  53. repeats[index,p_index] = pattern, k, t # create dict: index, p_index = pattern, genome, times
  54. def sanitize_dna_pattern(pattern):
  55. valid_pattern = True
  56. for c in pattern:
  57. if c == "A":
  58. pass
  59. elif c == "T":
  60. pass
  61. elif c == "G":
  62. pass
  63. elif c == "C":
  64. pass
  65. elif c == "N":
  66. pass
  67. else:
  68. valid_pattern = False
  69. return valid_pattern
  70. def teach_ai():
  71. mode = input("[TRAIN-AI] MODE -> (H)uman, (A)utomata: ").upper()
  72. if not os.path.isfile(brain_path):
  73. create_initial_seed_file()
  74. if mode == "H": # human mode
  75. teach_ai_human_mode()
  76. else: # libre AI
  77. teach_ai_automata_mode() # automata mode
  78. def teach_ai_human_mode(): # search/discard patterns with human interaction & generate local database
  79. search_patterns_lesson_with_a_human()
  80. def search_patterns_lesson_with_a_human():
  81. print("\n"+"-"*30)
  82. print("\n[TRAIN-AI] [HUMAN] [STOP] this mode; just entering whatever invalid pattern (ex: 'exit' or 'q').\n")
  83. key = "K" # continue
  84. while key == "K":
  85. pattern = input("[TRAIN-AI] [HUMAN] [LOOP] [SEARCH] Pattern (ex: attacg): ").upper()
  86. print("\n"+"-"*5 + "\n")
  87. key = search_pattern_on_lesson(pattern)
  88. if key == "Z": # stop
  89. break
  90. def search_pattern_on_lesson(pattern):
  91. valid_pattern = sanitize_dna_pattern(pattern)
  92. if valid_pattern == True:
  93. key = search_pattern_on_local_database(pattern) # search pattern on local database
  94. else:
  95. print("[ERROR] -> Invalid DNA pattern ... [EXITING!]\n")
  96. key = "Z" # stop
  97. return key
  98. def search_pattern_on_local_database(pattern):
  99. f=open(brain_path, 'r')
  100. memory = f.read().replace('\n',' ')
  101. f.close()
  102. patterns_known = 0
  103. if not "'"+pattern+"'" in memory: # always create new patterns
  104. create_new_pattern(pattern) # create new pattern
  105. patterns_known = patterns_known + 1
  106. else:
  107. for k, v in genomes.items(): # create patterns found for new genomes
  108. if k not in memory:
  109. create_new_pattern(pattern) # create new pattern
  110. patterns_known = patterns_known + 1
  111. if patterns_known == 0:
  112. print("[TRAIN-AI] [AUTOMATA] [LOOP] [RESULTS] -ALREADY- [LEARNED!] ... -> [GOING FOR NEXT!]\n")
  113. print("-"*5 + "\n")
  114. key = "K" # continue
  115. return key
  116. def create_initial_seed_file():
  117. f=open(brain_path, 'w')
  118. f.write("")
  119. f.close()
  120. def create_new_pattern(pattern): # append it to brain
  121. valid_pattern = sanitize_dna_pattern(pattern)
  122. if valid_pattern == True:
  123. if pattern not in known_patterns:
  124. known_patterns.append(pattern)
  125. try_pattern_against_all_genomes_by_genome(pattern) # generate repeats dict
  126. patterns_found = 0
  127. for k, v in repeats.items(): # list patterns found to output
  128. print (" *", k +":", "-> ",v,"")
  129. patterns_found = patterns_found + 1
  130. print("")
  131. if patterns_found == 0:
  132. print("[INFO] -> Not any found! ... [EXITING!]\n")
  133. else:
  134. f=open(brain_path, 'a')
  135. f.write(str(repeats)+os.linesep) # add dict as str
  136. f.close()
  137. else:
  138. print("[ERROR] -> Invalid DNA pattern ... [EXITING!]\n")
  139. def teach_ai_automata_mode(): # search patterns by bruteforcing ranges & generate local database
  140. search_patterns_lesson_with_an_ai()
  141. def search_patterns_lesson_with_an_ai():
  142. print("\n"+"-"*30)
  143. print("\n[TRAIN-AI] [AUTOMATA] [STOP] this mode; pressing 'CTRL+z'.\n")
  144. ranges = input("[TRAIN-AI] [AUTOMATA] [SEARCH] Set range (x<y) for pattern deep searching (ex: 2-8): ")
  145. print ("")
  146. valid_range, ranged_permutations = check_for_deep_searching_ranges(ranges)
  147. if str(valid_range) == "OK!":
  148. ranged_ending = False
  149. print("-"*15)
  150. print("\n[TRAIN-AI] [AUTOMATA] [SEARCH] Number of [PERMUTATIONS] estimated: [ "+str(ranged_permutations)+" ]\n")
  151. print("-"*15+"\n")
  152. num_pat = 0
  153. while ranged_ending == False: # try to STOP it using: CTRL-z
  154. try:
  155. pattern, ranged_ending = generate_random_pattern(ranges, ranged_permutations) # generate random seed
  156. if pattern:
  157. num_pat = num_pat + 1
  158. print("[TRAIN-AI] [AUTOMATA] [LOOP] [SEARCH] Generating [RANDOM!] ["+str(num_pat)+"/"+str(ranged_permutations)+"] pattern: [ " + str(pattern) + " ]\n")
  159. if not num_pat == ranged_permutations:
  160. search_pattern_on_lesson(pattern)
  161. else:
  162. search_pattern_on_lesson(pattern)
  163. print("[TRAIN-AI] [AUTOMATA] [RESULTS]: REVIEWED -> [ "+str(ranged_permutations)+" PERMUTATIONS ] ... -> [EXITING!]\n")
  164. ranged_ending = True
  165. except:
  166. pass
  167. else:
  168. print("-"*15+"\n")
  169. print("[TRAIN-AI] [AUTOMATA] [ERROR] -> [INVALID!] Deep Learning [RANGE] -> "+valid_range+" ... [EXITING!]\n")
  170. def generate_random_pattern(ranges, ranged_permutations):
  171. ranged_length = 0
  172. try:
  173. range_low = int(ranges.split("-")[0])
  174. range_high = int(ranges.split("-")[1])
  175. for i in range(range_low, range_high+1):
  176. ranged_length = ranged_length + 1
  177. if ranged_length == ranged_permutations: # all possible variables have been bruteforced/checked! -> exit
  178. pattern = None
  179. ranged_ending = True
  180. return pattern, ranged_ending
  181. else:
  182. ranged_ending = False
  183. seed = [random.randrange(0, 4) for _ in range(i)] # generate "random" seed
  184. if seed not in seeds_checked:
  185. seeds_checked.append(seed)
  186. pattern = ""
  187. for n in seed:
  188. if n == 0:
  189. pattern += "A"
  190. elif n == 1:
  191. pattern += "C"
  192. elif n == 2:
  193. pattern += "T"
  194. else:
  195. pattern += "G"
  196. return pattern, ranged_ending
  197. except:
  198. print("[TRAIN-AI] [AUTOMATA] [ERROR] -> [INVALID!] Deep Learning [RANGE] ... [EXITING!]\n")
  199. pattern = None
  200. ranged_ending = True
  201. return pattern, ranged_ending
  202. def check_for_deep_searching_ranges(ranges):
  203. try:
  204. range_low = ranges.split("-")[0]
  205. range_high = ranges.split("-")[1]
  206. except:
  207. valid_range = "'bad format'"
  208. try:
  209. range_low = int(range_low)
  210. except:
  211. valid_range = "'low range' should be an integer"
  212. try:
  213. range_high = int(range_high)
  214. except:
  215. valid_range = "'high range' should be an integer"
  216. try:
  217. if range_low < range_high:
  218. if range_low > 1: # always range > 1
  219. valid_range = "OK!"
  220. else:
  221. valid_range = "'low range' should be > than 1"
  222. else:
  223. valid_range = "'low range' should be < than 'high range'"
  224. except:
  225. valid_range = "'bad format'"
  226. try:
  227. ranged_permutations = math_ranged_permutations(range_low, range_high)
  228. except:
  229. ranged_permutations = 0
  230. valid_range = "'bad format'"
  231. return valid_range, ranged_permutations
  232. def math_ranged_permutations(range_low, range_high): # calculate ranged_permutations
  233. ranged_permutations = 0
  234. for i in range(range_low, range_high+1):
  235. ranged_permutations = ranged_permutations + (4**i)
  236. return ranged_permutations
  237. def libre_ai(): # show statistics / download new genomes / keep crossing new genomes with local database / search for new patterns (non stop!)
  238. if not os.path.isfile(brain_path):
  239. create_initial_seed_file()
  240. memory = examine_stored_brain_memory()
  241. if memory != "":
  242. #print("[LIBRE-AI] [STOP] this mode; pressing 'CTRL+z'.\n")
  243. libre_ai_show_statistics(memory) # show statistics
  244. def libre_ai_show_statistics(memory):
  245. print("[LIBRE-AI] [REPORTING] [STATISTICS] ... -> [STARTING!]\n")
  246. print("-"*15 + "\n")
  247. total_genomes = 0
  248. total_adenine = 0
  249. total_guanine = 0
  250. total_cytosine = 0
  251. total_thymine = 0
  252. total_any = 0
  253. total_patterns = 0
  254. secuence_length = 0
  255. secuences_length_list = {}
  256. largest = None
  257. largest_len = 0
  258. shortest_len = 0
  259. average = None
  260. shortest = None
  261. for k, v in genomes.items():
  262. secuence_length = len(v)
  263. secuences_length_list[k] = str(secuence_length)
  264. total_genomes = total_genomes + 1
  265. total_adenine = total_adenine + v.count("A")
  266. total_guanine = total_guanine + v.count("G")
  267. total_cytosine = total_cytosine + v.count("C")
  268. total_thymine = total_thymine + v.count("T")
  269. total_any = total_any + v.count("N")
  270. path = genomes_path # genome datasets raw data
  271. l = glob.glob(genomes_path+"*") # black magic!
  272. latest_collection_file = max(l, key=os.path.getctime)
  273. latest_collection_date = time.ctime(os.path.getmtime(latest_collection_file))
  274. total_nucleotids = [total_adenine, total_guanine, total_cytosine, total_thymine, total_any]
  275. num_total_nucleotids = total_adenine + total_guanine + total_cytosine + total_thymine + total_any
  276. nucleotid_more_present = max(total_nucleotids)
  277. print("[LIBRE-AI] [REPORTING] -STORAGE- [STATISTICS]: \n")
  278. extract_storage_sizes()
  279. print(" * [LATEST UPDATE]: '"+str(latest_collection_date)+"'\n")
  280. print(" + File: '"+str(latest_collection_file)+"'\n")
  281. print("-"*5 + "\n")
  282. print("[LIBRE-AI] [REPORTING] -COLLECTION- [STATISTICS]: \n")
  283. extract_total_patterns_learned_from_local(memory)
  284. print("\n"+"-"*5 + "\n")
  285. print("[LIBRE-AI] [REPORTING] -ANALYSIS- [STATISTICS]: \n")
  286. print(" * Total [DNA SECUENCES]: [ "+str(total_genomes)+" ]\n")
  287. largest = 0
  288. largest_pattern_name = []
  289. largest_pattern_size = []
  290. for k, v in secuences_length_list.items():
  291. if int(v) > int(largest):
  292. largest = v
  293. largest_pattern_name.append(k)
  294. largest_pattern_size.append(largest)
  295. for p in largest_pattern_name:
  296. largest_pattern_name = p
  297. for s in largest_pattern_size:
  298. largest_pattern_size = s
  299. print(" + [LARGEST] : "+str(largest_pattern_name)+ " [ "+str(largest_pattern_size)+" bp linear RNA ]")
  300. prev_shortest = None
  301. shortest_pattern_name = []
  302. shortest_pattern_size = []
  303. for k, v in secuences_length_list.items():
  304. if prev_shortest == None:
  305. shortest = v
  306. shortest_pattern_name.append(k)
  307. shortest_pattern_size.append(shortest)
  308. prev_shortest = True
  309. else:
  310. if int(v) < int(shortest):
  311. shortest = v
  312. shortest_pattern_name.append(k)
  313. shortest_pattern_size.append(shortest)
  314. for p in shortest_pattern_name:
  315. shortest_pattern_name = p
  316. for s in shortest_pattern_size:
  317. shortest_pattern_size = s
  318. print(" + [SHORTEST]: "+str(shortest_pattern_name)+ " [ "+str(shortest_pattern_size)+" bp linear RNA ]\n")
  319. print(" * Total [NUCLEOTIDS]: [ "+str(num_total_nucleotids)+" ]\n")
  320. if nucleotid_more_present == total_adenine:
  321. print(" + [A] Adenine : "+str(total_adenine)+" <- [MAX]")
  322. else:
  323. print(" + [A] Adenine : "+str(total_adenine))
  324. if nucleotid_more_present == total_guanine:
  325. print(" + [G] Guanine : "+str(total_guanine)+" <- [MAX]")
  326. else:
  327. print(" + [G] Guanine : "+str(total_guanine))
  328. if nucleotid_more_present == total_cytosine:
  329. print(" + [C] Cytosine : "+str(total_cytosine)+" <- [MAX]")
  330. else:
  331. print(" + [C] Cytosine : "+str(total_cytosine))
  332. if nucleotid_more_present == total_thymine:
  333. print(" + [T] Thymine : "+str(total_thymine)+" <- [MAX]")
  334. else:
  335. print(" + [T] Thymine : "+str(total_thymine))
  336. if total_any > 0:
  337. if nucleotid_more_present == total_any:
  338. print(" + [N] *ANY* : "+str(total_any)+" <- [MAX]\n")
  339. else:
  340. print(" + [N] *ANY* : "+str(total_any)+"\n")
  341. print("-"*5 + "\n")
  342. extract_pattern_most_present_local(memory)
  343. def convert_memory_to_dict(memory): # [index] = genome_name, pattern, num_rep
  344. memory_dict = {}
  345. index = 0
  346. for m in memory:
  347. regex_record = "'(.+?)': (.+?), '(.+?)'" # regex magics! - extract first each record
  348. pattern_record = re.compile(regex_record)
  349. record = re.findall(pattern_record, m)
  350. for r in record: # now extract each field
  351. index = index + 1
  352. name = str(r).split("', '(")[0]
  353. genome_name = str(name).split("'")[1]
  354. repeats = str(r).split("', '(")[1]
  355. genome_repeats = str(repeats).split("',")[0]
  356. pattern = str(repeats).split("',")[1]
  357. genome_pattern = pattern.replace(" ", "")
  358. genome_pattern = genome_pattern.replace("'", "")
  359. genome_pattern = genome_pattern.replace(")", "")
  360. memory_dict[index] = genome_name, genome_pattern, genome_repeats # generate memory_dict!
  361. return memory_dict
  362. def extract_pattern_most_present_local(memory):
  363. memory_dict = convert_memory_to_dict(memory)
  364. if genomes:
  365. try:
  366. f=open(dna_codons_list_path, 'r')
  367. codons = f.readlines()
  368. f.close()
  369. except:
  370. pass
  371. print("[LIBRE-AI] [REPORTING] -RESEARCHING- [STATISTICS]: \n")
  372. total_genomes = 0
  373. for k, v in genomes.items():
  374. total_genomes = total_genomes + 1
  375. if memory_dict:
  376. total_patterns = 0
  377. for m in memory:
  378. total_patterns = total_patterns + 1 # counter used for known patterns
  379. max_size_pattern_name, less_size_pattern_name, biggest_pattern_name, biggest_pattern_size, smaller_pattern_name, smaller_pattern_size, total_patterns_all_genomes, most_present_patterns_by_len_list, less_present_patterns_by_len_list = extract_patterns_most_found_in_all_genomes(memory_dict)
  380. print(" * Trying -[ "+str(total_patterns)+" ]- [PATTERNS LEARNED!] against -[ "+str(total_genomes)+ " ]- [DNA SECUENCES]:")
  381. if total_patterns_all_genomes:
  382. print("\n + Total [PATTERNS FOUND!]: [ "+str(total_patterns_all_genomes)+" ]")
  383. biggest_pattern_name_codon = None
  384. for c in codons:
  385. if c.split(":")[0] == str(biggest_pattern_name):
  386. biggest_pattern_name_codon = str(c.split(":")[1].replace("\n",""))
  387. print("\n - [MOST-PRESENT!]: [ "+str(biggest_pattern_size)+" ] time(s) -> [ "+str(biggest_pattern_name)+" ] "+str(biggest_pattern_name_codon)+"\n")
  388. if biggest_pattern_name_codon == None:
  389. print("\n - [MOST-PRESENT!]: [ "+str(biggest_pattern_size)+" ] time(s) -> [ "+str(biggest_pattern_name)+" ]\n")
  390. other_pattern_name_codon = None
  391. for k, v in most_present_patterns_by_len_list.items():
  392. for c in codons:
  393. if c.split(":")[0] == str(v[0]):
  394. other_pattern_name_codon = str(c.split(":")[1].replace("\n",""))
  395. print(" * [length = "+str(k)+"] : [ "+str(v[1])+" ] time(s) -> [ "+str(v[0])+" ] "+str(other_pattern_name_codon))
  396. if other_pattern_name_codon == None:
  397. print(" * [length = "+str(k)+"] : [ "+str(v[1])+" ] time(s) -> [ "+str(v[0])+" ]")
  398. other_pattern_name_codon = None
  399. smaller_pattern_name_codon = None
  400. for c in codons:
  401. if c.split(":")[0] == str(smaller_pattern_name):
  402. smaller_pattern_name_codon = str(c.split(":")[1].replace("\n",""))
  403. print("\n - [LESS-PRESENT!]: [ "+str(smaller_pattern_size)+" ] time(s) -> [ "+str(smaller_pattern_name)+" ] "+str(smaller_pattern_name_codon)+"\n")
  404. if smaller_pattern_name_codon == None:
  405. print("\n - [LESS-PRESENT!]: [ "+str(smaller_pattern_size)+" ] time(s) -> [ "+str(smaller_pattern_name)+" ]\n")
  406. other_pattern_name_codon = None
  407. for n, m in less_present_patterns_by_len_list.items():
  408. for c in codons:
  409. if c.split(":")[0] == str(m[0]):
  410. other_pattern_name_codon = str(c.split(":")[1].replace("\n",""))
  411. print(" * [length = "+str(n)+"] : [ "+str(m[1])+" ] time(s) -> [ "+str(m[0])+" ] "+str(other_pattern_name_codon))
  412. if other_pattern_name_codon == None:
  413. print(" * [length = "+str(n)+"] : [ "+str(m[1])+" ] time(s) -> [ "+str(m[0])+" ]")
  414. other_pattern_name_codon = None
  415. max_size_pattern_name = max(most_present_patterns_by_len_list, key=most_present_patterns_by_len_list.get)
  416. less_size_pattern_name = min(most_present_patterns_by_len_list, key=most_present_patterns_by_len_list.get)
  417. print("\n - [LARGEST] : [ "+str(max_size_pattern_name)+" ] bp linear RNA")
  418. print(" - [SHORTEST]: [ "+str(less_size_pattern_name)+" ] bp linear RNA\n")
  419. else:
  420. print("\n + Total [PATTERNS FOUND!]: [ 0 ]\n")
  421. try:
  422. f=open(universal_primer_list_path, 'r')
  423. UPL = f.readlines()
  424. f.close()
  425. if UPL:
  426. extract_potential_primer_pairs(UPL, total_genomes, codons)
  427. except:
  428. pass
  429. if codons:
  430. extract_potential_dna_codons(codons, total_genomes)
  431. def extract_potential_primer_pairs(UPL, total_genomes, codons):
  432. total_universal_primer_pairs = 0
  433. total_primer_pairs_found = 0
  434. primer_pairs_found_list = {}
  435. for pp in UPL:
  436. total_universal_primer_pairs = total_universal_primer_pairs + 1
  437. for k, v in genomes.items():
  438. pair_name = pp.split(":")[1].upper().replace("\n","")
  439. pair_sec = pp.split(":")[0]
  440. if str(pair_name) in str(v.upper()):
  441. pair_times = v.count(pair_name)
  442. total_primer_pairs_found += pair_times
  443. primer_pairs_found_list[pair_sec] = pair_name, total_primer_pairs_found
  444. print(" * Trying -[ "+str(total_universal_primer_pairs)+" ]- [UNIVERSAL PRIMER PAIRS!] against -[ "+str(total_genomes)+ " ]- [DNA SECUENCES]:")
  445. if total_primer_pairs_found:
  446. total_primer_pairs_found_list = 0
  447. for m, n in primer_pairs_found_list.items():
  448. total_primer_pairs_found_list = total_primer_pairs_found_list + n[1]
  449. print("\n + Total [UNIVERSAL PRIMER PAIRS FOUND!]: [ "+str(total_primer_pairs_found_list)+" ]\n")
  450. for m, n in primer_pairs_found_list.items():
  451. print(" * "+str(m)+" -> [ "+str(n[0])+" ] : [ "+str(n[1])+" ] time(s)")
  452. print ("")
  453. else:
  454. print("\n + Total [UNIVERSAL PRIMER PAIRS FOUND!]: [ 0 ]\n")
  455. def extract_potential_dna_codons(codons, total_genomes):
  456. total_codons = 0
  457. total_codons_found = 0
  458. codons_found_list = {}
  459. codons_found_list_by_codon = {}
  460. for c in codons:
  461. total_codons = total_codons + 1
  462. for k, v in genomes.items():
  463. codon_name = c.split(":")[0].upper().replace("\n","")
  464. if str(codon_name) in str(v.upper()):
  465. codons_times = v.count(codon_name)
  466. total_codons_found += codons_times
  467. codons_found_list[codons_times] = c.split(":")[0], str(c.split(":")[1]), k
  468. print(" * Trying -[ "+str(total_codons)+" ]- [PATTERN CODONS!] against -[ "+str(total_genomes)+ " ]- [DNA SECUENCES]:")
  469. if total_codons_found:
  470. for m, n in codons_found_list.items():
  471. codon_sec = str(n[0])
  472. codon_name = str(n[1].replace("\n",""))
  473. if not codon_sec in codons_found_list_by_codon.keys():
  474. codons_found_list_by_codon[codon_sec] = codon_name, m
  475. else:
  476. for r, s in codons_found_list_by_codon.items():
  477. if codon_sec == r:
  478. new_v = s[1] + m
  479. codons_found_list_by_codon[codon_sec] = codon_name, new_v
  480. codons_found_list_by_name = {}
  481. for g,z in codons_found_list_by_codon.items():
  482. if not z[0] in codons_found_list_by_name.keys():
  483. codons_found_list_by_name[z[0]]= z[1]
  484. else:
  485. for e, q in codons_found_list_by_name.items():
  486. if z[0] == e:
  487. new_s = q + z[1]
  488. codons_found_list_by_name[z[0]] = new_s
  489. total_codons_by_codon = 0
  490. for p, f in codons_found_list_by_name.items():
  491. total_codons_by_codon = total_codons_by_codon + f
  492. print("\n + Total [PATTERN CODONS FOUND!]: [ "+str(total_codons_by_codon)+" ]\n")
  493. most_present_codons_found = max(codons_found_list_by_name, key=codons_found_list_by_name.get)
  494. less_present_codons_found = min(codons_found_list_by_name, key=codons_found_list_by_name.get)
  495. print(" - [MOST-PRESENT!]: "+str(most_present_codons_found))
  496. print(" - [LESS-PRESENT!]: "+str(less_present_codons_found)+"\n")
  497. for p, f in codons_found_list_by_name.items():
  498. print(" * "+str(p)+" : "+str(f)+" time(s)")
  499. print ("")
  500. else:
  501. print("\n + Total [PATTERN CODONS FOUND!]: [ 0 ]\n")
  502. def extract_patterns_most_found_in_all_genomes(memory_dict):
  503. present_patterns = []
  504. for m, p in memory_dict.items():
  505. pattern = p[1]
  506. if pattern not in present_patterns:
  507. present_patterns.append(pattern)
  508. index = 0 # genome num index
  509. for pattern in present_patterns:
  510. index = index + 1
  511. try_pattern_against_all_genomes_by_pattern(pattern, index)
  512. total_patterns_all_genomes = 0
  513. largest_size_by_pattern = {}
  514. largest_size_by_pattern_index = 0
  515. for k,v in repeats.items():
  516. largest_size_by_pattern_index = largest_size_by_pattern_index + 1
  517. largest_size_by_pattern[largest_size_by_pattern_index] = v[0], v[2]
  518. total_patterns_by_pattern = 0
  519. list_total_patterns_by_pattern = {}
  520. for i, v in largest_size_by_pattern.items():
  521. total_patterns_by_pattern = total_patterns_by_pattern + v[1]
  522. list_total_patterns_by_pattern[v[0]] = total_patterns_by_pattern
  523. biggest_pattern_name = None
  524. biggest_pattern_size = 0
  525. smaller_pattern_name = None
  526. smaller_pattern_size = 0
  527. max_size_pattern = 0
  528. for r, z in list_total_patterns_by_pattern.items():
  529. total_patterns_all_genomes = total_patterns_all_genomes + z
  530. pattern_length = len(r)
  531. if pattern_length > max_size_pattern:
  532. max_size_pattern_name = r
  533. if biggest_pattern_name == None:
  534. biggest_pattern_name = r
  535. smaller_pattern_name = r
  536. biggest_pattern_size = z
  537. smaller_pattern_size = z
  538. less_size_pattern_name = r
  539. less_size_pattern_size = z
  540. else:
  541. if pattern_length < less_size_pattern_size:
  542. less_size_pattern_size = pattern_length
  543. less_size_pattern_name = r
  544. if z > biggest_pattern_size:
  545. biggest_pattern_name = r
  546. biggest_pattern_size = z
  547. else:
  548. if z < smaller_pattern_size:
  549. smaller_pattern_name = r
  550. smaller_pattern_size = z
  551. most_present_patterns_by_len_list = extract_most_present_pattern_by_len(list_total_patterns_by_pattern)
  552. less_present_patterns_by_len_list = extract_less_present_pattern_by_len(list_total_patterns_by_pattern)
  553. return max_size_pattern_name, less_size_pattern_name, biggest_pattern_name, biggest_pattern_size, smaller_pattern_name, smaller_pattern_size, total_patterns_all_genomes, most_present_patterns_by_len_list, less_present_patterns_by_len_list
  554. def extract_most_present_pattern_by_len(list_total_patterns_by_pattern):
  555. most_present_patterns_by_len_list = {}
  556. for k, v in list_total_patterns_by_pattern.items():
  557. pattern_len = len(k)
  558. if pattern_len in most_present_patterns_by_len_list.keys():
  559. if v > most_present_patterns_by_len_list[pattern_len][1]:
  560. most_present_patterns_by_len_list[pattern_len] = k, v
  561. else:
  562. most_present_patterns_by_len_list[pattern_len] = k, v
  563. return most_present_patterns_by_len_list
  564. def extract_less_present_pattern_by_len(list_total_patterns_by_pattern):
  565. less_present_patterns_by_len_list = {}
  566. for k, v in list_total_patterns_by_pattern.items():
  567. pattern_len = len(k)
  568. if pattern_len in less_present_patterns_by_len_list.keys():
  569. if v < less_present_patterns_by_len_list[pattern_len][1]:
  570. less_present_patterns_by_len_list[pattern_len] = k, v
  571. else:
  572. less_present_patterns_by_len_list[pattern_len] = k, v
  573. return less_present_patterns_by_len_list
  574. def extract_storage_sizes():
  575. total_dataset_size = 0
  576. total_files_size = 0
  577. total_list_size = 0
  578. for file in glob.iglob(genomes_path + '*/*/*', recursive=True): # extract datasets sizes
  579. if(file.endswith(".genome")):
  580. total_dataset_size = total_dataset_size + len(file)
  581. try:
  582. f=open(brain_path, "r") # extract brain sizes
  583. total_brain_size = len(f.read())
  584. f.close()
  585. except:
  586. total_brain_size = 0
  587. try:
  588. f=open(genomes_list_path, "r") # extract genomes list sizes
  589. total_list_size = len(f.read())
  590. f.close()
  591. except:
  592. total_list_size = 0
  593. if total_dataset_size > 0:
  594. total_files_size = int(total_files_size) + int(total_dataset_size)
  595. dataset_s, dataset_size_name = convert_size(total_dataset_size)
  596. total_dataset_size = '%s %s' % (dataset_s,dataset_size_name)
  597. if total_brain_size > 0:
  598. total_files_size = int(total_files_size) + int(total_brain_size)
  599. brain_s, brain_size_name = convert_size(total_brain_size)
  600. total_brain_size = '%s %s' % (brain_s,brain_size_name)
  601. if total_list_size > 0:
  602. total_files_size = int(total_files_size) + int(total_list_size)
  603. list_s, list_size_name = convert_size(total_list_size)
  604. total_list_size = '%s %s' % (list_s,list_size_name)
  605. total_s, total_size_name = convert_size(total_files_size)
  606. total_files_size = '%s %s' % (total_s,total_size_name)
  607. print(" * Total [FILE SIZES]: "+str(total_files_size)+"\n")
  608. if total_dataset_size:
  609. print(" + [DATASET]: "+str(total_dataset_size)+"\n")
  610. if total_list_size:
  611. print(" + [LIST]: "+str(total_list_size)+"\n")
  612. if total_brain_size:
  613. print(" + [BRAIN]: "+str(total_brain_size)+"\n")
  614. def extract_total_patterns_learned_from_local(memory):
  615. total_patterns = 0
  616. for m in memory:
  617. total_patterns = total_patterns + 1
  618. print(" * [SETTINGS] Using [MAX. LENGTH] for range [PATTERN] = [ "+str(max_length)+" ]\n")
  619. if total_patterns > 0:
  620. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" ]\n")
  621. else:
  622. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" ]")
  623. generate_pattern_len_report_structure(memory)
  624. return memory
  625. def list_genomes_on_database():
  626. print("[LIST] [REPORTING] [DNA SECUENCES] ... -> [STARTING!]\n")
  627. f=open(dna_codons_list_path, 'r')
  628. codons = f.readlines()
  629. f.close()
  630. print("-"*15 + "\n")
  631. f=open(genomes_list_path, 'w')
  632. for k, v in genomes.items():
  633. print ("* "+str(k))
  634. print ("\n + Total [NUCLEOTIDS]: [ "+str(len(v)-1)+" bp linear RNA ]\n")
  635. print (" - [A] Adenine :", str(v.count("A")))
  636. print (" - [G] Guanine :", str(v.count("G")))
  637. print (" - [C] Cytosine :", str(v.count("C")))
  638. print (" - [T] Thymine :", str(v.count("T")))
  639. f.write(str("* "+str(k)+"\n"))
  640. f.write(str("\n + Total [NUCLEOTIDS]: [ "+str(len(v)-1)+" bp linear RNA ]\n"))
  641. f.write(str(" - [A] Adenine : " + str(v.count("A"))+"\n"))
  642. f.write(str(" - [G] Guanine : " + str(v.count("G"))+"\n"))
  643. f.write(str(" - [C] Cytosine : " + str(v.count("C"))+"\n"))
  644. f.write(str(" - [T] Thymine : " + str(v.count("T"))+"\n"))
  645. if v.count("N") > 0:
  646. print (" + [N] *ANY* :", str(v.count("N")))
  647. f.write(str(" + [N] *ANY* : "+ str(v.count("N"))+"\n"))
  648. total_codons = 0
  649. for c in codons:
  650. codon_counter = v.count(str(c.split(":")[0]))
  651. total_codons = total_codons + codon_counter
  652. print ("\n + Total [PATTERN CODONS!]: [ "+str(total_codons)+" ] time(s)\n")
  653. f.write(str("\n + Total [PATTERN CODONS!]: [ "+str(total_codons)+" ] time(s)\n"))
  654. for c in codons:
  655. codon_sec = str(c.split(":")[0])
  656. codon_name = str(c.split(":")[1].replace("\n",""))
  657. codon_counter = str(v.count(str(c.split(":")[0])))
  658. print (" - ["+codon_sec+"] "+codon_name+" :", codon_counter)
  659. f.write(str(" - ["+codon_sec+"] "+codon_name+" : "+ codon_counter)+"\n")
  660. print ("")
  661. f.write("\n")
  662. print("-"*15 + "\n")
  663. print ("[LIST] [INFO] [SAVED!] at: '"+str(genomes_list_path)+"'... -> [EXITING!]\n")
  664. f.close()
  665. def examine_stored_brain_memory():
  666. memory = [] # list used as hot-memory
  667. f=open(brain_path, 'r')
  668. for line in f.readlines():
  669. if line not in memory:
  670. memory.append(line)
  671. f.close()
  672. if memory == "": # first time run!
  673. print ("[LIBRE-AI] [INFO] Not any [BRAIN] present ... -> [BUILDING ONE!]\n")
  674. print("-"*15 + "\n")
  675. for i in range(2, 11+1):
  676. seed = [random.randrange(0, 4) for _ in range(i)] # generate "static" genesis seed
  677. if seed not in seeds_checked:
  678. seeds_checked.append(seed)
  679. pattern = ""
  680. for n in seed:
  681. if n == 0:
  682. pattern += "A"
  683. elif n == 1:
  684. pattern += "C"
  685. elif n == 2:
  686. pattern += "T"
  687. else:
  688. pattern += "G"
  689. print("[LIBRE-AI] [SEARCH] Generating [RANDOM] pattern: " + str(pattern) + "\n")
  690. create_new_pattern(pattern) # create new pattern
  691. print("-"*15 + "\n")
  692. print ("[LIBRE-AI] [INFO] A new [BRAIN] has been created !!! ... -> [ADVANCING!]\n")
  693. f=open(brain_path, 'r')
  694. memory = f.read().replace('\n',' ')
  695. f.close()
  696. return memory
  697. def generate_pattern_len_report_structure(memory):
  698. pattern_len_1 = 0 # related with [MAX. LENGTH] range
  699. pattern_len_2 = 0
  700. pattern_len_3 = 0
  701. pattern_len_4 = 0
  702. pattern_len_5 = 0
  703. pattern_len_6 = 0
  704. pattern_len_7 = 0
  705. pattern_len_8 = 0
  706. pattern_len_9 = 0
  707. pattern_len_10 = 0
  708. pattern_len_11 = 0
  709. pattern_len_12 = 0
  710. pattern_len_13 = 0
  711. pattern_len_14 = 0
  712. pattern_len_15 = 0
  713. pattern_len_16 = 0
  714. pattern_len_17 = 0
  715. pattern_len_18 = 0
  716. pattern_len_19 = 0
  717. pattern_len_20 = 0
  718. pattern_len_21 = 0
  719. pattern_len_22 = 0
  720. pattern_len_23 = 0
  721. pattern_len_24 = 0
  722. pattern_len_25 = 0
  723. pattern_len_26 = 0
  724. pattern_len_27 = 0
  725. pattern_len_28 = 0
  726. pattern_len_29 = 0
  727. pattern_len_30 = 0
  728. pattern_len_31 = 0
  729. pattern_len_32 = 0
  730. pattern_len_33 = 0
  731. pattern_len_34 = 0
  732. pattern_len_35 = 0
  733. pattern_len_36 = 0
  734. pattern_len_37 = 0
  735. pattern_len_38 = 0
  736. pattern_len_39 = 0
  737. pattern_len_40 = 0
  738. pattern_len_41 = 0
  739. pattern_len_42 = 0
  740. pattern_len_43 = 0
  741. pattern_len_44 = 0
  742. pattern_len_45 = 0
  743. pattern_len_46 = 0
  744. pattern_len_47 = 0
  745. pattern_len_48 = 0
  746. pattern_len_49 = 0
  747. pattern_len_50 = 0
  748. for m in memory:
  749. try:
  750. pattern_len = m.split(", '")[1]
  751. pattern_len = pattern_len.split("')")[0]
  752. pattern_len = len(pattern_len)
  753. except:
  754. pattern_len = 0 # discard!
  755. if pattern_len == 1:
  756. pattern_len_1 = pattern_len_1 + 1
  757. elif pattern_len == 2:
  758. pattern_len_2 = pattern_len_2 + 1
  759. elif pattern_len == 3:
  760. pattern_len_3 = pattern_len_3 + 1
  761. elif pattern_len == 4:
  762. pattern_len_4 = pattern_len_4 + 1
  763. elif pattern_len == 5:
  764. pattern_len_5 = pattern_len_5 + 1
  765. elif pattern_len == 6:
  766. pattern_len_6 = pattern_len_6 + 1
  767. elif pattern_len == 7:
  768. pattern_len_7 = pattern_len_7 + 1
  769. elif pattern_len == 8:
  770. pattern_len_8 = pattern_len_8 + 1
  771. elif pattern_len == 9:
  772. pattern_len_9 = pattern_len_9 + 1
  773. elif pattern_len == 10:
  774. pattern_len_10 = pattern_len_10 + 1
  775. elif pattern_len == 11:
  776. pattern_len_11 = pattern_len_11 + 1
  777. elif pattern_len == 12:
  778. pattern_len_12 = pattern_len_12 + 1
  779. elif pattern_len == 13:
  780. pattern_len_13 = pattern_len_13 + 1
  781. elif pattern_len == 14:
  782. pattern_len_14 = pattern_len_14 + 1
  783. elif pattern_len == 15:
  784. pattern_len_15 = pattern_len_15 + 1
  785. elif pattern_len == 16:
  786. pattern_len_16 = pattern_len_16 + 1
  787. elif pattern_len == 17:
  788. pattern_len_17 = pattern_len_17 + 1
  789. elif pattern_len == 18:
  790. pattern_len_18 = pattern_len_18 + 1
  791. elif pattern_len == 19:
  792. pattern_len_19 = pattern_len_19 + 1
  793. elif pattern_len == 20:
  794. pattern_len_20 = pattern_len_20 + 1
  795. elif pattern_len == 21:
  796. pattern_len_21 = pattern_len_21 + 1
  797. elif pattern_len == 22:
  798. pattern_len_22 = pattern_len_22 + 1
  799. elif pattern_len == 23:
  800. pattern_len_23 = pattern_len_23 + 1
  801. elif pattern_len == 24:
  802. pattern_len_24 = pattern_len_24 + 1
  803. elif pattern_len == 25:
  804. pattern_len_25 = pattern_len_25 + 1
  805. elif pattern_len == 26:
  806. pattern_len_26 = pattern_len_26 + 1
  807. elif pattern_len == 27:
  808. pattern_len_27 = pattern_len_27 + 1
  809. elif pattern_len == 28:
  810. pattern_len_28 = pattern_len_28 + 1
  811. elif pattern_len == 29:
  812. pattern_len_29 = pattern_len_29 + 1
  813. elif pattern_len == 30:
  814. pattern_len_30 = pattern_len_30 + 1
  815. elif pattern_len == 31:
  816. pattern_len_31 = pattern_len_31 + 1
  817. elif pattern_len == 32:
  818. pattern_len_32 = pattern_len_32 + 1
  819. elif pattern_len == 33:
  820. pattern_len_33 = pattern_len_33 + 1
  821. elif pattern_len == 34:
  822. pattern_len_34 = pattern_len_34 + 1
  823. elif pattern_len == 35:
  824. pattern_len_35 = pattern_len_35 + 1
  825. elif pattern_len == 36:
  826. pattern_len_36 = pattern_len_36 + 1
  827. elif pattern_len == 37:
  828. pattern_len_37 = pattern_len_37 + 1
  829. elif pattern_len == 38:
  830. pattern_len_38 = pattern_len_38 + 1
  831. elif pattern_len == 39:
  832. pattern_len_39 = pattern_len_39 + 1
  833. elif pattern_len == 40:
  834. pattern_len_40 = pattern_len_40 + 1
  835. elif pattern_len == 41:
  836. pattern_len_41 = pattern_len_41 + 1
  837. elif pattern_len == 42:
  838. pattern_len_42 = pattern_len_42 + 1
  839. elif pattern_len == 43:
  840. pattern_len_43 = pattern_len_43 + 1
  841. elif pattern_len == 44:
  842. pattern_len_44 = pattern_len_44 + 1
  843. elif pattern_len == 45:
  844. pattern_len_45 = pattern_len_45 + 1
  845. elif pattern_len == 46:
  846. pattern_len_46 = pattern_len_46 + 1
  847. elif pattern_len == 47:
  848. pattern_len_47 = pattern_len_47 + 1
  849. elif pattern_len == 48:
  850. pattern_len_48 = pattern_len_48 + 1
  851. elif pattern_len == 49:
  852. pattern_len_49 = pattern_len_49 + 1
  853. elif pattern_len == 50:
  854. pattern_len_50 = pattern_len_50 + 1
  855. else:
  856. pass
  857. if pattern_len_1 > 0:
  858. print(" - [length = 1] : [ "+str(pattern_len_1)+" ]")
  859. if pattern_len_2 > 0:
  860. print(" - [length = 2] : [ "+str(pattern_len_2)+" ]")
  861. if pattern_len_3 > 0:
  862. print(" - [length = 3] : [ "+str(pattern_len_3)+" ]")
  863. if pattern_len_4 > 0:
  864. print(" - [length = 4] : [ "+str(pattern_len_4)+" ]")
  865. if pattern_len_5 > 0:
  866. print(" - [length = 5] : [ "+str(pattern_len_5)+" ]")
  867. if pattern_len_6 > 0:
  868. print(" - [length = 6] : [ "+str(pattern_len_6)+" ]")
  869. if pattern_len_7 > 0:
  870. print(" - [length = 7] : [ "+str(pattern_len_7)+" ]")
  871. if pattern_len_8 > 0:
  872. print(" - [length = 8] : [ "+str(pattern_len_8)+" ]")
  873. if pattern_len_9 > 0:
  874. print(" - [length = 9] : [ "+str(pattern_len_9)+" ]")
  875. if pattern_len_10 > 0:
  876. print(" - [length = 10]: [ "+str(pattern_len_10)+" ]")
  877. if pattern_len_11 > 0:
  878. print(" - [length = 11]: [ "+str(pattern_len_11)+" ]")
  879. if pattern_len_12 > 0:
  880. print(" - [length = 12]: [ "+str(pattern_len_12)+" ]")
  881. if pattern_len_13 > 0:
  882. print(" - [length = 13]: [ "+str(pattern_len_13)+" ]")
  883. if pattern_len_14 > 0:
  884. print(" - [length = 14]: [ "+str(pattern_len_14)+" ]")
  885. if pattern_len_15 > 0:
  886. print(" - [length = 15]: [ "+str(pattern_len_15)+" ]")
  887. if pattern_len_16 > 0:
  888. print(" - [length = 16]: [ "+str(pattern_len_16)+" ]")
  889. if pattern_len_17 > 0:
  890. print(" - [length = 17]: [ "+str(pattern_len_17)+" ]")
  891. if pattern_len_18 > 0:
  892. print(" - [length = 18]: [ "+str(pattern_len_18)+" ]")
  893. if pattern_len_19 > 0:
  894. print(" - [length = 19]: [ "+str(pattern_len_19)+" ]")
  895. if pattern_len_20 > 0:
  896. print(" - [length = 20]: [ "+str(pattern_len_20)+" ]")
  897. if pattern_len_21 > 0:
  898. print(" - [length = 21]: [ "+str(pattern_len_21)+" ]")
  899. if pattern_len_22 > 0:
  900. print(" - [length = 22]: [ "+str(pattern_len_22)+" ]")
  901. if pattern_len_23 > 0:
  902. print(" - [length = 23]: [ "+str(pattern_len_23)+" ]")
  903. if pattern_len_24 > 0:
  904. print(" - [length = 24]: [ "+str(pattern_len_24)+" ]")
  905. if pattern_len_25 > 0:
  906. print(" - [length = 25]: [ "+str(pattern_len_25)+" ]")
  907. if pattern_len_26 > 0:
  908. print(" - [length = 26]: [ "+str(pattern_len_26)+" ]")
  909. if pattern_len_27 > 0:
  910. print(" - [length = 27]: [ "+str(pattern_len_27)+" ]")
  911. if pattern_len_28 > 0:
  912. print(" - [length = 28]: [ "+str(pattern_len_28)+" ]")
  913. if pattern_len_29 > 0:
  914. print(" - [length = 29]: [ "+str(pattern_len_29)+" ]")
  915. if pattern_len_30 > 0:
  916. print(" - [length = 30]: [ "+str(pattern_len_30)+" ]")
  917. if pattern_len_31 > 0:
  918. print(" - [length = 31]: [ "+str(pattern_len_31)+" ]")
  919. if pattern_len_32 > 0:
  920. print(" - [length = 32]: [ "+str(pattern_len_32)+" ]")
  921. if pattern_len_33 > 0:
  922. print(" - [length = 33]: [ "+str(pattern_len_33)+" ]")
  923. if pattern_len_34 > 0:
  924. print(" - [length = 34]: [ "+str(pattern_len_34)+" ]")
  925. if pattern_len_35 > 0:
  926. print(" - [length = 35]: [ "+str(pattern_len_35)+" ]")
  927. if pattern_len_36 > 0:
  928. print(" - [length = 36]: [ "+str(pattern_len_36)+" ]")
  929. if pattern_len_37 > 0:
  930. print(" - [length = 37]: [ "+str(pattern_len_37)+" ]")
  931. if pattern_len_38 > 0:
  932. print(" - [length = 38]: [ "+str(pattern_len_38)+" ]")
  933. if pattern_len_39 > 0:
  934. print(" - [length = 39]: [ "+str(pattern_len_39)+" ]")
  935. if pattern_len_40 > 0:
  936. print(" - [length = 40]: [ "+str(pattern_len_40)+" ]")
  937. if pattern_len_41 > 0:
  938. print(" - [length = 41]: [ "+str(pattern_len_41)+" ]")
  939. if pattern_len_42 > 0:
  940. print(" - [length = 42]: [ "+str(pattern_len_42)+" ]")
  941. if pattern_len_43 > 0:
  942. print(" - [length = 43]: [ "+str(pattern_len_43)+" ]")
  943. if pattern_len_44 > 0:
  944. print(" - [length = 44]: [ "+str(pattern_len_44)+" ]")
  945. if pattern_len_45 > 0:
  946. print(" - [length = 45]: [ "+str(pattern_len_45)+" ]")
  947. if pattern_len_46 > 0:
  948. print(" - [length = 46]: [ "+str(pattern_len_46)+" ]")
  949. if pattern_len_47 > 0:
  950. print(" - [length = 47]: [ "+str(pattern_len_47)+" ]")
  951. if pattern_len_48 > 0:
  952. print(" - [length = 48]: [ "+str(pattern_len_48)+" ]")
  953. if pattern_len_49 > 0:
  954. print(" - [length = 49]: [ "+str(pattern_len_49)+" ]")
  955. if pattern_len_50 > 0:
  956. print(" - [length = 50]: [ "+str(pattern_len_50)+" ]")
  957. def print_banner():
  958. print("\n"+"="*50)
  959. print(" ____ _ _ _ _ ")
  960. print("| _ \(_) __ _| \ | | / \ ")
  961. print("| | | | |/ _` | \| | / _ \ ")
  962. print("| |_| | | (_| | |\ |/ ___ \ ")
  963. print("|____/|_|\__,_|_| \_/_/ \_\ by psy")
  964. print('\n"Search and Recognize patterns in DNA sequences"')
  965. print("\n"+"="*50)
  966. print("+ GENOMES DETECTED:", str(num_files))
  967. print("="*50)
  968. print("\n"+"-"*15+"\n")
  969. print(" * VERSION: ")
  970. print(" + "+VERSION+" - (rev:"+RELEASE+")")
  971. print("\n * SOURCES:")
  972. print(" + "+SOURCE1)
  973. print(" + "+SOURCE2)
  974. print("\n * CONTACT: ")
  975. print(" + "+CONTACT+"\n")
  976. print("-"*15+"\n")
  977. print("="*50)
  978. # sub_init #
  979. num_files=0
  980. for file in glob.iglob(genomes_path + '**/*', recursive=True):
  981. if(file.endswith(".genome")):
  982. num_files = num_files + 1
  983. f=open(file, 'r')
  984. genome = f.read().replace('\n',' ')
  985. genomes[file.replace("datasets/","")] = genome.upper() # add genome to main dict
  986. f.close()
  987. print_banner() # show banner
  988. option = input("\n+ CHOOSE: (S)earch, (L)ist, (T)rain or (R)eport: ").upper()
  989. print("")
  990. print("="*50+"\n")
  991. if option == "S": # search pattern
  992. search_pattern_with_human()
  993. elif option == "L": # list genomes
  994. list_genomes_on_database()
  995. elif option == "T": # teach AI
  996. teach_ai()
  997. else: # libre AI
  998. libre_ai()
  999. print ("="*50+"\n")