diana.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-"
  3. """
  4. DiaNA - 2020 - by psy (epsylon@riseup.net)
  5. You should have received a copy of the GNU General Public License along
  6. with DiaNA; if not, write to the Free Software Foundation, Inc., 51
  7. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  8. """
  9. VERSION = "v0.1_beta"
  10. RELEASE = "16032020"
  11. SOURCE1 = "https://code.03c8.net/epsylon/diana"
  12. SOURCE2 = "https://github.com/epsylon/diana"
  13. CONTACT = "epsylon@riseup.net - (https://03c8.net)"
  14. """
  15. DNA-equiv:
  16. A <-> T
  17. C <-> G
  18. """
  19. import re, os, glob, random, time, math
  20. brain_path = "datasets/brain.in" # in/out brain-tmp file
  21. genomes_path = 'datasets/' # genome datasets raw data
  22. genomes_list_path = "datasets/genome.list" # genome list
  23. dna_letters = ["A", "T", "G", "C", "N"] # dna alphabet [n for ANY nucl.]
  24. genomes = {} # main sources dict: genome_name
  25. seeds_checked = [] # list used for random checked patterns
  26. repeats = {} # repetitions 'tmp' dict: genome_name:(repets,pattern)
  27. known_patterns = [] # list used for known patterns
  28. estimated_max_range_for_library_completed = 50 # [MAX. LENGTH] for range [PATTERN]
  29. estimated_patterns_for_library_completed = 1466015503700 # x = y+4^z
  30. estimated_quantity_per_pattern_for_library_completed = int(estimated_patterns_for_library_completed / estimated_max_range_for_library_completed)
  31. def convert_size(size):
  32. if (size == 0):
  33. return '0 B'
  34. size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
  35. i = int(math.floor(math.log(size,1024)))
  36. p = math.pow(1024,i)
  37. s = round(size/p,2)
  38. return s, size_name[i]
  39. def search_pattern_with_human():
  40. pattern = input("[HUMAN] [SEARCH] Pattern (ex: attacg): ").upper()
  41. print("\n"+"-"*5 + "\n")
  42. create_new_pattern(pattern) # create new pattern
  43. def try_pattern_against_all_genomes_by_genome(pattern):
  44. for k, v in genomes.items():
  45. if pattern in v:
  46. t = len(re.findall(pattern, v))
  47. repeats[k] = t, pattern # create dict: genome = times, pattern
  48. def try_pattern_against_all_genomes_by_pattern(pattern, index):
  49. p_index = 0 # pattern index
  50. for k, v in genomes.items():
  51. if pattern in v:
  52. p_index = p_index + 1
  53. t = len(re.findall(pattern, v))
  54. repeats[index,p_index] = pattern, k, t # create dict: index, p_index = pattern, genome, times
  55. def sanitize_dna_pattern(pattern):
  56. valid_pattern = True
  57. for c in pattern:
  58. if c == "A":
  59. pass
  60. elif c == "T":
  61. pass
  62. elif c == "G":
  63. pass
  64. elif c == "C":
  65. pass
  66. elif c == "N":
  67. pass
  68. else:
  69. valid_pattern = False
  70. return valid_pattern
  71. def teach_ai():
  72. mode = input("[TRAIN-AI] MODE -> (H)uman, (A)utomata: ").upper()
  73. if not os.path.isfile(brain_path):
  74. create_initial_seed_file()
  75. if mode == "H": # human mode
  76. teach_ai_human_mode()
  77. else: # libre AI
  78. teach_ai_automata_mode() # automata mode
  79. def teach_ai_human_mode(): # search/discard patterns with human interaction & generate local database
  80. search_patterns_lesson_with_a_human()
  81. def search_patterns_lesson_with_a_human():
  82. print("\n"+"-"*30)
  83. print("\n[TRAIN-AI] [HUMAN] [STOP] this mode; just entering whatever invalid pattern (ex: 'exit' or 'q').\n")
  84. key = "K" # continue
  85. while key == "K":
  86. pattern = input("[TRAIN-AI] [HUMAN] [LOOP] [SEARCH] Pattern (ex: attacg): ").upper()
  87. print("\n"+"-"*5 + "\n")
  88. key = search_pattern_on_lesson(pattern)
  89. if key == "Z": # stop
  90. break
  91. def search_pattern_on_lesson(pattern):
  92. valid_pattern = sanitize_dna_pattern(pattern)
  93. if valid_pattern == True:
  94. key = search_pattern_on_local_database(pattern) # search pattern on local database
  95. else:
  96. print("[ERROR] -> Invalid DNA pattern ... [EXITING!]\n")
  97. key = "Z" # stop
  98. return key
  99. def search_pattern_on_local_database(pattern):
  100. f=open(brain_path, 'r')
  101. memory = f.read().replace('\n',' ')
  102. f.close()
  103. patterns_known = 0
  104. if not "'"+pattern+"'" in memory: # always create new patterns
  105. create_new_pattern(pattern) # create new pattern
  106. patterns_known = patterns_known + 1
  107. else:
  108. for k, v in genomes.items(): # create patterns found for new genomes
  109. if k not in memory:
  110. create_new_pattern(pattern) # create new pattern
  111. patterns_known = patterns_known + 1
  112. if patterns_known == 0:
  113. print("[TRAIN-AI] [AUTOMATA] [LOOP] [RESULTS] -ALREADY- [LEARNED!] ... -> [GOING FOR NEXT!]\n")
  114. print("-"*5 + "\n")
  115. key = "K" # continue
  116. return key
  117. def create_initial_seed_file():
  118. f=open(brain_path, 'w')
  119. f.write("")
  120. f.close()
  121. def create_new_pattern(pattern): # append it to brain
  122. valid_pattern = sanitize_dna_pattern(pattern)
  123. if valid_pattern == True:
  124. if pattern not in known_patterns:
  125. known_patterns.append(pattern)
  126. try_pattern_against_all_genomes_by_genome(pattern) # generate repeats dict
  127. patterns_found = 0
  128. for k, v in repeats.items(): # list patterns found to output
  129. print (" *", k +":", "-> ",v,"")
  130. patterns_found = patterns_found + 1
  131. print("")
  132. if patterns_found == 0:
  133. print("[INFO] -> Not any found! ... [EXITING!]\n")
  134. else:
  135. f=open(brain_path, 'a')
  136. f.write(str(repeats)+os.linesep) # add dict as str
  137. f.close()
  138. def teach_ai_automata_mode(): # search patterns by bruteforcing ranges & generate local database
  139. search_patterns_lesson_with_an_ai()
  140. def search_patterns_lesson_with_an_ai():
  141. print("\n"+"-"*30)
  142. print("\n[TRAIN-AI] [AUTOMATA] [STOP] this mode; pressing 'CTRL+z'.\n")
  143. ranges = input("[TRAIN-AI] [AUTOMATA] [SEARCH] Set range (x<y) for pattern deep searching (ex: 2-8): ")
  144. print ("")
  145. valid_range, ranged_permutations = check_for_deep_searching_ranges(ranges)
  146. if str(valid_range) == "OK!":
  147. ranged_ending = False
  148. print("-"*15)
  149. print("\n[TRAIN-AI] [AUTOMATA] [SEARCH] Number of [PERMUTATIONS] estimated: [ "+str(ranged_permutations)+" ]\n")
  150. print("-"*15+"\n")
  151. num_pat = 0
  152. time.sleep(10)
  153. while ranged_ending == False: # try to STOP it using: CTRL-z
  154. try:
  155. pattern, ranged_ending = generate_random_pattern(ranges, ranged_permutations) # generate random seed
  156. if pattern:
  157. num_pat = num_pat + 1
  158. print("[TRAIN-AI] [AUTOMATA] [LOOP] [SEARCH] Generating [RANDOM!] ["+str(num_pat)+"/"+str(ranged_permutations)+"] pattern: [ " + str(pattern) + " ]\n")
  159. if not num_pat == ranged_permutations:
  160. search_pattern_on_lesson(pattern)
  161. else:
  162. search_pattern_on_lesson(pattern)
  163. print("[TRAIN-AI] [AUTOMATA] [RESULTS]: REVIEWED -> [ "+str(ranged_permutations)+" PERMUTATIONS ] ... -> [EXITING!]\n")
  164. ranged_ending = True
  165. except:
  166. pass
  167. else:
  168. print("-"*15+"\n")
  169. print("[TRAIN-AI] [AUTOMATA] [ERROR] -> [INVALID!] Deep Learning [RANGE] -> "+valid_range+" ... [EXITING!]\n")
  170. def generate_random_pattern(ranges, ranged_permutations):
  171. ranged_length = 0
  172. try:
  173. range_low = int(ranges.split("-")[0])
  174. range_high = int(ranges.split("-")[1])
  175. for i in range(range_low, range_high+1):
  176. ranged_length = ranged_length + 1
  177. if ranged_length == ranged_permutations: # all possible variables have been bruteforced/checked! -> exit
  178. pattern = None
  179. ranged_ending = True
  180. return pattern, ranged_ending
  181. else:
  182. ranged_ending = False
  183. seed = [random.randrange(0, 4) for _ in range(i)] # generate "random" seed
  184. if seed not in seeds_checked:
  185. seeds_checked.append(seed)
  186. pattern = ""
  187. for n in seed:
  188. if n == 0:
  189. pattern += "A"
  190. elif n == 1:
  191. pattern += "C"
  192. elif n == 2:
  193. pattern += "T"
  194. else:
  195. pattern += "G"
  196. return pattern, ranged_ending
  197. except:
  198. print("[TRAIN-AI] [AUTOMATA] [ERROR] -> [INVALID!] Deep Learning [RANGE] ... [EXITING!]\n")
  199. pattern = None
  200. ranged_ending = True
  201. return pattern, ranged_ending
  202. def check_for_deep_searching_ranges(ranges):
  203. try:
  204. range_low = ranges.split("-")[0]
  205. range_high = ranges.split("-")[1]
  206. except:
  207. valid_range = "'bad format'"
  208. try:
  209. range_low = int(range_low)
  210. except:
  211. valid_range = "'low range' should be an integer"
  212. try:
  213. range_high = int(range_high)
  214. except:
  215. valid_range = "'high range' should be an integer"
  216. try:
  217. if range_low < range_high:
  218. if range_low > 1: # always range > 1
  219. valid_range = "OK!"
  220. else:
  221. valid_range = "'low range' should be > than 1"
  222. else:
  223. valid_range = "'low range' should be < than 'high range'"
  224. except:
  225. valid_range = "'bad format'"
  226. try:
  227. ranged_permutations = math_ranged_permutations(range_low, range_high)
  228. except:
  229. ranged_permutations = 0
  230. valid_range = "'bad format'"
  231. return valid_range, ranged_permutations
  232. def math_ranged_permutations(range_low, range_high): # calculate ranged_permutations
  233. ranged_permutations = 0
  234. for i in range(range_low, range_high+1):
  235. ranged_permutations = ranged_permutations + (4**i)
  236. return ranged_permutations
  237. def libre_ai(): # show statistics / download new genomes / keep crossing new genomes with local database / search for new patterns (non stop!)
  238. if not os.path.isfile(brain_path):
  239. create_initial_seed_file()
  240. memory = examine_stored_brain_memory()
  241. if memory != "":
  242. #print("[LIBRE-AI] [STOP] this mode; pressing 'CTRL+z'.\n")
  243. libre_ai_show_statistics(memory) # show statistics
  244. def libre_ai_show_statistics(memory):
  245. print("[LIBRE-AI] [REPORTING] [STATISTICS] ... -> [STARTING!]\n")
  246. print("-"*15 + "\n")
  247. total_genomes = 0
  248. total_adenine = 0
  249. total_guanine = 0
  250. total_cytosine = 0
  251. total_thymine = 0
  252. total_any = 0
  253. total_patterns = 0
  254. secuence_length = 0
  255. secuences_length_list = {}
  256. largest = None
  257. largest_len = 0
  258. shortest_len = 0
  259. average = None
  260. shortest = None
  261. for k, v in genomes.items():
  262. secuence_length = len(v)
  263. secuences_length_list[k] = str(secuence_length)
  264. total_genomes = total_genomes + 1
  265. total_adenine = total_adenine + v.count("A")
  266. total_guanine = total_guanine + v.count("G")
  267. total_cytosine = total_cytosine + v.count("C")
  268. total_thymine = total_thymine + v.count("T")
  269. total_any = total_any + v.count("N")
  270. path = genomes_path # genome datasets raw data
  271. l = glob.glob(genomes_path+"*") # black magic!
  272. latest_collection_file = max(l, key=os.path.getctime)
  273. latest_collection_date = time.ctime(os.path.getmtime(latest_collection_file))
  274. total_nucleotids = [total_adenine, total_guanine, total_cytosine, total_thymine, total_any]
  275. num_total_nucleotids = total_adenine + total_guanine + total_cytosine + total_thymine + total_any
  276. nucleotid_more_present = max(total_nucleotids)
  277. print("[LIBRE-AI] [REPORTING] -STORAGE- [STATISTICS]: \n")
  278. extract_storage_sizes()
  279. print(" * [LATEST UPDATE]: '"+str(latest_collection_date)+"'\n")
  280. print(" + File: '"+str(latest_collection_file)+"'\n")
  281. print("-"*5 + "\n")
  282. print("[LIBRE-AI] [REPORTING] -COLLECTION- [STATISTICS]: \n")
  283. extract_total_patterns_learned_from_local(memory)
  284. print("\n"+"-"*5 + "\n")
  285. print("[LIBRE-AI] [REPORTING] -ANALYSIS- [STATISTICS]: \n")
  286. print(" * Total [DNA SECUENCES]: [ "+str(total_genomes)+" ]\n")
  287. largest = 0
  288. largest_pattern_name = []
  289. largest_pattern_size = []
  290. for k, v in secuences_length_list.items():
  291. if int(v) > int(largest):
  292. largest = v
  293. largest_pattern_name.append(k)
  294. largest_pattern_size.append(largest)
  295. for p in largest_pattern_name:
  296. largest_pattern_name = p
  297. for s in largest_pattern_size:
  298. largest_pattern_size = s
  299. print(" + [LARGEST] : "+str(largest_pattern_name)+ " [ "+str(largest_pattern_size)+" bp linear RNA ]")
  300. prev_shortest = None
  301. shortest_pattern_name = []
  302. shortest_pattern_size = []
  303. for k, v in secuences_length_list.items():
  304. if prev_shortest == None:
  305. shortest = v
  306. shortest_pattern_name.append(k)
  307. shortest_pattern_size.append(shortest)
  308. prev_shortest = True
  309. else:
  310. if int(v) < int(shortest):
  311. shortest = v
  312. shortest_pattern_name.append(k)
  313. shortest_pattern_size.append(shortest)
  314. for p in shortest_pattern_name:
  315. shortest_pattern_name = p
  316. for s in shortest_pattern_size:
  317. shortest_pattern_size = s
  318. print(" + [SHORTEST]: "+str(shortest_pattern_name)+ " [ "+str(shortest_pattern_size)+" bp linear RNA ]\n")
  319. print(" * Total [NUCLEOTIDS]: [ "+str(num_total_nucleotids)+" ]\n")
  320. if nucleotid_more_present == total_adenine:
  321. print(" + [A] Adenine : "+str(total_adenine)+" <- [MAX]")
  322. else:
  323. print(" + [A] Adenine : "+str(total_adenine))
  324. if nucleotid_more_present == total_guanine:
  325. print(" + [G] Guanine : "+str(total_guanine)+" <- [MAX]")
  326. else:
  327. print(" + [G] Guanine : "+str(total_guanine))
  328. if nucleotid_more_present == total_cytosine:
  329. print(" + [C] Cytosine : "+str(total_cytosine)+" <- [MAX]")
  330. else:
  331. print(" + [C] Cytosine : "+str(total_cytosine))
  332. if nucleotid_more_present == total_thymine:
  333. print(" + [T] Thymine : "+str(total_thymine)+" <- [MAX]")
  334. else:
  335. print(" + [T] Thymine : "+str(total_thymine))
  336. if total_any > 0:
  337. if nucleotid_more_present == total_any:
  338. print(" + [N] *ANY* : "+str(total_any)+" <- [MAX]\n")
  339. else:
  340. print(" + [N] *ANY* : "+str(total_any)+"\n")
  341. print("-"*5 + "\n")
  342. extract_pattern_most_present_local(memory)
  343. def convert_memory_to_dict(memory): # [index] = genome_name, pattern, num_rep
  344. memory_dict = {}
  345. index = 0
  346. for m in memory:
  347. regex_record = "'(.+?)': (.+?), '(.+?)'" # regex magics! - extract first each record
  348. pattern_record = re.compile(regex_record)
  349. record = re.findall(pattern_record, m)
  350. for r in record: # now extract each field
  351. index = index + 1
  352. name = str(r).split("', '(")[0]
  353. genome_name = str(name).split("'")[1]
  354. repeats = str(r).split("', '(")[1]
  355. genome_repeats = str(repeats).split("',")[0]
  356. pattern = str(repeats).split("',")[1]
  357. genome_pattern = pattern.replace(" ", "")
  358. genome_pattern = genome_pattern.replace("'", "")
  359. genome_pattern = genome_pattern.replace(")", "")
  360. memory_dict[index] = genome_name, genome_pattern, genome_repeats # generate memory_dict!
  361. return memory_dict
  362. def extract_pattern_most_present_local(memory):
  363. memory_dict = convert_memory_to_dict(memory)
  364. if memory_dict:
  365. print("[LIBRE-AI] [REPORTING] -RESEARCHING- [STATISTICS]: \n")
  366. total_genomes = 0
  367. total_patterns = 0
  368. for k, v in genomes.items():
  369. total_genomes = total_genomes + 1
  370. for m in memory:
  371. total_patterns = total_patterns + 1 # counter used for known patterns
  372. max_size_pattern_name, less_size_pattern_name, biggest_pattern_name, biggest_pattern_size, smaller_pattern_name, smaller_pattern_size, total_patterns_all_genomes = extract_patterns_most_found_in_all_genomes(memory_dict)
  373. print(" * Trying -[ "+str(total_patterns)+" ]- [PATTERNS LEARNED!] against -[ "+str(total_genomes)+ " ]- [DNA SECUENCES]:")
  374. print("\n + Total [PATTERNS FOUND!]: [ "+str(total_patterns_all_genomes)+" ]")
  375. print("\n - [LARGEST] : [ "+str(max_size_pattern_name)+" ] -> [ "+str(len(max_size_pattern_name))+" bp linear RNA ]")
  376. print(" - [SHORTEST]: [ "+str(less_size_pattern_name)+" ] -> [ "+str(len(less_size_pattern_name))+" bp linear RNA ]\n")
  377. print(" - [MOST-PRESENT!]: [ "+str(biggest_pattern_name)+" ] -> [ "+str(biggest_pattern_size)+" ] time(s)")
  378. print(" - [LESS-PRESENT!]: [ "+str(smaller_pattern_name)+" ] -> [ "+str(smaller_pattern_size)+" ] time(s)\n")
  379. def extract_patterns_most_found_in_all_genomes(memory_dict):
  380. present_patterns = []
  381. for m, p in memory_dict.items():
  382. pattern = p[1]
  383. if pattern not in present_patterns:
  384. present_patterns.append(pattern)
  385. index = 0 # genome num index
  386. for pattern in present_patterns:
  387. index = index + 1
  388. try_pattern_against_all_genomes_by_pattern(pattern, index)
  389. total_patterns_all_genomes = 0
  390. largest_size_by_pattern = {}
  391. largest_size_by_pattern_index = 0
  392. for k,v in repeats.items():
  393. largest_size_by_pattern_index = largest_size_by_pattern_index + 1
  394. total_patterns_all_genomes = total_patterns_all_genomes + v[2] # total patterns all genomes
  395. largest_size_by_pattern[largest_size_by_pattern_index] = v[0], v[2]
  396. total_patterns_by_pattern = 0
  397. list_total_patterns_by_pattern = {}
  398. for i, v in largest_size_by_pattern.items():
  399. total_patterns_by_pattern = total_patterns_by_pattern + v[1]
  400. list_total_patterns_by_pattern[v[0]] = total_patterns_by_pattern
  401. total_patterns_by_pattern = 0 # reset patterns counter
  402. biggest_pattern_name = None
  403. biggest_pattern_size = 0
  404. smaller_pattern_name = None
  405. smaller_pattern_size = 0
  406. max_size_pattern = 0
  407. for r, z in list_total_patterns_by_pattern.items():
  408. pattern_length = len(r)
  409. if pattern_length > max_size_pattern:
  410. max_size_pattern_name = r
  411. if biggest_pattern_name == None:
  412. biggest_pattern_name = r
  413. smaller_pattern_name = r
  414. biggest_pattern_size = z
  415. smaller_pattern_size = z
  416. less_size_pattern_name = r
  417. less_size_pattern_size = z
  418. else:
  419. if pattern_length < less_size_pattern_size:
  420. less_size_pattern_size = pattern_length
  421. less_size_pattern_name = r
  422. if z > biggest_pattern_size:
  423. biggest_pattern_name = r
  424. biggest_pattern_size = z
  425. else:
  426. if z < smaller_pattern_size:
  427. smaller_pattern_name = r
  428. smaller_pattern_size = z
  429. return max_size_pattern_name, less_size_pattern_name, biggest_pattern_name, biggest_pattern_size, smaller_pattern_name, smaller_pattern_size, total_patterns_all_genomes
  430. def extract_storage_sizes():
  431. total_dataset_size = 0
  432. total_files_size = 0
  433. total_list_size = 0
  434. for file in glob.iglob(genomes_path + '**/*', recursive=True):
  435. if(file.endswith(".genome")):
  436. total_dataset_size = total_dataset_size + len(file)
  437. elif(file.endswith(".in")):
  438. total_brain_size = len(file)
  439. elif(file.endswith(".list")):
  440. total_list_size = len(file)
  441. if total_dataset_size > 0:
  442. total_files_size = int(total_files_size) + int(total_dataset_size)
  443. dataset_s, dataset_size_name = convert_size(total_dataset_size)
  444. total_dataset_size = '%s %s' % (dataset_s,dataset_size_name)
  445. if total_brain_size > 0:
  446. total_files_size = int(total_files_size) + int(total_brain_size)
  447. brain_s, brain_size_name = convert_size(total_brain_size)
  448. total_brain_size = '%s %s' % (brain_s,brain_size_name)
  449. if total_list_size > 0:
  450. total_files_size = int(total_files_size) + int(total_list_size)
  451. list_s, list_size_name = convert_size(total_list_size)
  452. total_list_size = '%s %s' % (list_s,list_size_name)
  453. total_s, total_size_name = convert_size(total_files_size)
  454. total_files_size = '%s %s' % (total_s,total_size_name)
  455. print(" * Total [FILE SIZES]: "+str(total_files_size)+"\n")
  456. if total_dataset_size:
  457. print(" + [DATASET]: "+str(total_dataset_size))
  458. if total_list_size:
  459. print(" + [LIST]: "+str(total_list_size))
  460. if total_brain_size:
  461. print(" + [BRAIN]: "+str(total_brain_size)+"\n")
  462. def extract_total_patterns_learned_from_local(memory):
  463. total_patterns = 0
  464. for m in memory:
  465. total_patterns = total_patterns + 1
  466. print(" * [SETTINGS] Using [MAX. LENGTH] for range [PATTERN] = [ "+str(estimated_max_range_for_library_completed)+" ]\n")
  467. if total_patterns < estimated_patterns_for_library_completed:
  468. library_completion = (total_patterns/estimated_patterns_for_library_completed)*100
  469. print(" + [LIBRARY COMPLETED]: [ "+str('%.20f' % library_completion)+"% ]")
  470. if total_patterns > 0:
  471. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" / "+str(estimated_patterns_for_library_completed)+" ] \n")
  472. else:
  473. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" / "+str(estimated_patterns_for_library_completed)+" ]")
  474. else:
  475. total_current_library_completion = (total_patterns/estimated_patterns_for_library_completed)*100
  476. library_completion = 100
  477. print(" + [LIBRARY COMPLETED]: [ "+str(library_completion)+"% ]")
  478. print(" + [CURRENT LIBRARY] : [ "+str('%.00f' % total_current_library_completion)+"% ] -> [ATTENTION!]: INCREASED [MAX. LENGTH] for range [PATTERN] -> REQUIRED!")
  479. if total_patterns > 0:
  480. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" ]\n")
  481. else:
  482. print(" + [PATTERNS LEARNED!]: [ "+str(total_patterns)+" ]")
  483. pattern_len_1 = 0 # related with [MAX. LENGTH] range
  484. pattern_len_2 = 0
  485. pattern_len_3 = 0
  486. pattern_len_4 = 0
  487. pattern_len_5 = 0
  488. pattern_len_6 = 0
  489. pattern_len_7 = 0
  490. pattern_len_8 = 0
  491. pattern_len_9 = 0
  492. pattern_len_10 = 0
  493. pattern_len_11 = 0
  494. pattern_len_12 = 0
  495. pattern_len_13 = 0
  496. pattern_len_14 = 0
  497. pattern_len_15 = 0
  498. pattern_len_16 = 0
  499. pattern_len_17 = 0
  500. pattern_len_18 = 0
  501. pattern_len_19 = 0
  502. pattern_len_20 = 0
  503. pattern_len_21 = 0
  504. pattern_len_22 = 0
  505. pattern_len_23 = 0
  506. pattern_len_24 = 0
  507. pattern_len_25 = 0
  508. pattern_len_26 = 0
  509. pattern_len_27 = 0
  510. pattern_len_28 = 0
  511. pattern_len_29 = 0
  512. pattern_len_30 = 0
  513. pattern_len_31 = 0
  514. pattern_len_32 = 0
  515. pattern_len_33 = 0
  516. pattern_len_34 = 0
  517. pattern_len_35 = 0
  518. pattern_len_36 = 0
  519. pattern_len_37 = 0
  520. pattern_len_38 = 0
  521. pattern_len_39 = 0
  522. pattern_len_40 = 0
  523. pattern_len_41 = 0
  524. pattern_len_42 = 0
  525. pattern_len_43 = 0
  526. pattern_len_44 = 0
  527. pattern_len_45 = 0
  528. pattern_len_46 = 0
  529. pattern_len_47 = 0
  530. pattern_len_48 = 0
  531. pattern_len_49 = 0
  532. pattern_len_50 = 0
  533. for m in memory:
  534. try:
  535. pattern_len = m.split(", '")[1]
  536. pattern_len = pattern_len.split("')")[0]
  537. pattern_len = len(pattern_len)
  538. except:
  539. pattern_len = 0 # discard!
  540. if pattern_len == 1:
  541. pattern_len_1 = pattern_len_1 + 1
  542. elif pattern_len == 2:
  543. pattern_len_2 = pattern_len_2 + 1
  544. elif pattern_len == 3:
  545. pattern_len_3 = pattern_len_3 + 1
  546. elif pattern_len == 4:
  547. pattern_len_4 = pattern_len_4 + 1
  548. elif pattern_len == 5:
  549. pattern_len_5 = pattern_len_5 + 1
  550. elif pattern_len == 6:
  551. pattern_len_6 = pattern_len_6 + 1
  552. elif pattern_len == 7:
  553. pattern_len_7 = pattern_len_7 + 1
  554. elif pattern_len == 8:
  555. pattern_len_8 = pattern_len_8 + 1
  556. elif pattern_len == 9:
  557. pattern_len_9 = pattern_len_9 + 1
  558. elif pattern_len == 10:
  559. pattern_len_10 = pattern_len_10 + 1
  560. elif pattern_len == 11:
  561. pattern_len_11 = pattern_len_11 + 1
  562. elif pattern_len == 12:
  563. pattern_len_12 = pattern_len_12 + 1
  564. elif pattern_len == 13:
  565. pattern_len_13 = pattern_len_13 + 1
  566. elif pattern_len == 14:
  567. pattern_len_14 = pattern_len_14 + 1
  568. elif pattern_len == 15:
  569. pattern_len_15 = pattern_len_15 + 1
  570. elif pattern_len == 16:
  571. pattern_len_16 = pattern_len_16 + 1
  572. elif pattern_len == 17:
  573. pattern_len_17 = pattern_len_17 + 1
  574. elif pattern_len == 18:
  575. pattern_len_18 = pattern_len_18 + 1
  576. elif pattern_len == 19:
  577. pattern_len_19 = pattern_len_19 + 1
  578. elif pattern_len == 20:
  579. pattern_len_20 = pattern_len_20 + 1
  580. elif pattern_len == 21:
  581. pattern_len_21 = pattern_len_21 + 1
  582. elif pattern_len == 22:
  583. pattern_len_22 = pattern_len_22 + 1
  584. elif pattern_len == 23:
  585. pattern_len_23 = pattern_len_23 + 1
  586. elif pattern_len == 24:
  587. pattern_len_24 = pattern_len_24 + 1
  588. elif pattern_len == 25:
  589. pattern_len_25 = pattern_len_25 + 1
  590. elif pattern_len == 26:
  591. pattern_len_26 = pattern_len_26 + 1
  592. elif pattern_len == 27:
  593. pattern_len_27 = pattern_len_27 + 1
  594. elif pattern_len == 28:
  595. pattern_len_28 = pattern_len_28 + 1
  596. elif pattern_len == 29:
  597. pattern_len_29 = pattern_len_29 + 1
  598. elif pattern_len == 30:
  599. pattern_len_30 = pattern_len_30 + 1
  600. elif pattern_len == 31:
  601. pattern_len_31 = pattern_len_31 + 1
  602. elif pattern_len == 32:
  603. pattern_len_32 = pattern_len_32 + 1
  604. elif pattern_len == 33:
  605. pattern_len_33 = pattern_len_33 + 1
  606. elif pattern_len == 34:
  607. pattern_len_34 = pattern_len_34 + 1
  608. elif pattern_len == 35:
  609. pattern_len_35 = pattern_len_35 + 1
  610. elif pattern_len == 36:
  611. pattern_len_36 = pattern_len_36 + 1
  612. elif pattern_len == 37:
  613. pattern_len_37 = pattern_len_37 + 1
  614. elif pattern_len == 38:
  615. pattern_len_38 = pattern_len_38 + 1
  616. elif pattern_len == 39:
  617. pattern_len_39 = pattern_len_39 + 1
  618. elif pattern_len == 40:
  619. pattern_len_40 = pattern_len_40 + 1
  620. elif pattern_len == 41:
  621. pattern_len_41 = pattern_len_41 + 1
  622. elif pattern_len == 42:
  623. pattern_len_42 = pattern_len_42 + 1
  624. elif pattern_len == 43:
  625. pattern_len_43 = pattern_len_43 + 1
  626. elif pattern_len == 44:
  627. pattern_len_44 = pattern_len_44 + 1
  628. elif pattern_len == 45:
  629. pattern_len_45 = pattern_len_45 + 1
  630. elif pattern_len == 46:
  631. pattern_len_46 = pattern_len_46 + 1
  632. elif pattern_len == 47:
  633. pattern_len_47 = pattern_len_47 + 1
  634. elif pattern_len == 48:
  635. pattern_len_48 = pattern_len_48 + 1
  636. elif pattern_len == 49:
  637. pattern_len_49 = pattern_len_49 + 1
  638. elif pattern_len == 50:
  639. pattern_len_50 = pattern_len_50 + 1
  640. else:
  641. pass
  642. if pattern_len_1 < 101:
  643. progression_len_1 = pattern_len_1 * "*"
  644. else:
  645. progression_len_1 = 100 * "*+"+str(pattern_len_1-100)
  646. if pattern_len_2 < 101:
  647. progression_len_2 = pattern_len_2 * "*"
  648. else:
  649. progression_len_2 = 100 * "*+"+str(pattern_len_2-100)
  650. if pattern_len_3 < 101:
  651. progression_len_3 = pattern_len_3 * "*"
  652. else:
  653. progression_len_3 = 100 * "*+"+str(pattern_len_3-100)
  654. if pattern_len_4 < 101:
  655. progression_len_4 = pattern_len_4 * "*"
  656. else:
  657. progression_len_4 = 100 * "*"+" 100+"+str(pattern_len_4-100)
  658. if pattern_len_5 < 101:
  659. progression_len_5 = pattern_len_5 * "*"
  660. else:
  661. progression_len_5 = 100 * "*+"+str(pattern_len_5-100)
  662. if pattern_len_6 < 101:
  663. progression_len_6 = pattern_len_6 * "*"
  664. else:
  665. progression_len_6 = 100 * "*+"+str(pattern_len_6-100)
  666. if pattern_len_7 < 101:
  667. progression_len_7 = pattern_len_7 * "*"
  668. else:
  669. progression_len_7 = 100 * "*+"+str(pattern_len_7-100)
  670. if pattern_len_8 < 101:
  671. progression_len_8 = pattern_len_8 * "*"
  672. else:
  673. progression_len_8 = 100 * "*+"+str(pattern_len_8-100)
  674. if pattern_len_9 < 101:
  675. progression_len_9 = pattern_len_9 * "*"
  676. else:
  677. progression_len_9 = 100 * "*+"+str(pattern_len_9-100)
  678. if pattern_len_10 < 101:
  679. progression_len_10 = pattern_len_10 * "*"
  680. else:
  681. progression_len_10 = 100 * "*+"+str(pattern_len_10-100)
  682. if pattern_len_11 < 101:
  683. progression_len_11 = pattern_len_11 * "*"
  684. else:
  685. progression_len_11 = 100 * "*+"+str(pattern_len_11-100)
  686. if pattern_len_12 < 101:
  687. progression_len_12 = pattern_len_12 * "*"
  688. else:
  689. progression_len_12 = 100 * "*+"+str(pattern_len_12-100)
  690. if pattern_len_13 < 101:
  691. progression_len_13 = pattern_len_13 * "*"
  692. else:
  693. progression_len_13 = 100 * "*+"+str(pattern_len_13-100)
  694. if pattern_len_14 < 101:
  695. progression_len_14 = pattern_len_14 * "*"
  696. else:
  697. progression_len_14 = 100 * "*+"+str(pattern_len_14-100)
  698. if pattern_len_15 < 101:
  699. progression_len_15 = pattern_len_15 * "*"
  700. else:
  701. progression_len_15 = 100 * "*+"+str(pattern_len_15-100)
  702. if pattern_len_16 < 101:
  703. progression_len_16 = pattern_len_16 * "*"
  704. else:
  705. progression_len_16 = 100 * "*+"+str(pattern_len_16-100)
  706. if pattern_len_17 < 101:
  707. progression_len_17 = pattern_len_17 * "*"
  708. else:
  709. progression_len_17 = 100 * "*+"+str(pattern_len_17-100)
  710. if pattern_len_18 < 101:
  711. progression_len_18 = pattern_len_18 * "*"
  712. else:
  713. progression_len_18 = 100 * "*+"+str(pattern_len_18-100)
  714. if pattern_len_19 < 101:
  715. progression_len_19 = pattern_len_19 * "*"
  716. else:
  717. progression_len_19 = 100 * "*+"+str(pattern_len_19-100)
  718. if pattern_len_20 < 101:
  719. progression_len_20 = pattern_len_20 * "*"
  720. else:
  721. progression_len_20 = 100 * "*+"+str(pattern_len_20-100)
  722. if pattern_len_21 < 101:
  723. progression_len_21 = pattern_len_21 * "*"
  724. else:
  725. progression_len_21 = 100 * "*+"+str(pattern_len_21-100)
  726. if pattern_len_22 < 101:
  727. progression_len_22 = pattern_len_22 * "*"
  728. else:
  729. progression_len_22 = 100 * "*+"+str(pattern_len_22-100)
  730. if pattern_len_23 < 101:
  731. progression_len_23 = pattern_len_23 * "*"
  732. else:
  733. progression_len_23 = 100 * "*+"+str(pattern_len_23-100)
  734. if pattern_len_24 < 101:
  735. progression_len_24 = pattern_len_24 * "*"
  736. else:
  737. progression_len_24 = 100 * "*+"+str(pattern_len_24-100)
  738. if pattern_len_25 < 101:
  739. progression_len_25 = pattern_len_25 * "*"
  740. else:
  741. progression_len_25 = 100 * "*+"+str(pattern_len_25-100)
  742. if pattern_len_26 < 101:
  743. progression_len_26 = pattern_len_26 * "*"
  744. else:
  745. progression_len_26 = 100 * "*+"+str(pattern_len_26-100)
  746. if pattern_len_27 < 101:
  747. progression_len_27 = pattern_len_27 * "*"
  748. else:
  749. progression_len_27 = 100 * "*+"+str(pattern_len_27-100)
  750. if pattern_len_28 < 101:
  751. progression_len_28 = pattern_len_28 * "*"
  752. else:
  753. progression_len_28 = 100 * "*+"+str(pattern_len_28-100)
  754. if pattern_len_29 < 101:
  755. progression_len_29 = pattern_len_29 * "*"
  756. else:
  757. progression_len_29 = 100 * "*+"+str(pattern_len_29-100)
  758. if pattern_len_30 < 101:
  759. progression_len_30 = pattern_len_30 * "*"
  760. else:
  761. progression_len_30 = 100 * "*+"+str(pattern_len_30-100)
  762. if pattern_len_31 < 101:
  763. progression_len_31 = pattern_len_31 * "*"
  764. else:
  765. progression_len_31 = 100 * "*+"+str(pattern_len_31-100)
  766. if pattern_len_32 < 101:
  767. progression_len_32 = pattern_len_32 * "*"
  768. else:
  769. progression_len_32 = 100 * "*+"+str(pattern_len_32-100)
  770. if pattern_len_33 < 101:
  771. progression_len_33 = pattern_len_33 * "*"
  772. else:
  773. progression_len_33 = 100 * "*+"+str(pattern_len_33-100)
  774. if pattern_len_34 < 101:
  775. progression_len_34 = pattern_len_34 * "*"
  776. else:
  777. progression_len_34 = 100 * "*+"+str(pattern_len_34-100)
  778. if pattern_len_35 < 101:
  779. progression_len_35 = pattern_len_35 * "*"
  780. else:
  781. progression_len_35 = 100 * "*+"+str(pattern_len_35-100)
  782. if pattern_len_36 < 101:
  783. progression_len_36 = pattern_len_36 * "*"
  784. else:
  785. progression_len_36 = 100 * "*+"+str(pattern_len_36-100)
  786. if pattern_len_37 < 101:
  787. progression_len_37 = pattern_len_37 * "*"
  788. else:
  789. progression_len_37 = 100 * "*+"+str(pattern_len_37-100)
  790. if pattern_len_38 < 101:
  791. progression_len_38 = pattern_len_38 * "*"
  792. else:
  793. progression_len_38 = 100 * "*+"+str(pattern_len_38-100)
  794. if pattern_len_39 < 101:
  795. progression_len_39 = pattern_len_39 * "*"
  796. else:
  797. progression_len_39 = 100 * "*+"+str(pattern_len_39-100)
  798. if pattern_len_40 < 101:
  799. progression_len_40 = pattern_len_40 * "*"
  800. else:
  801. progression_len_40 = 100 * "*+"+str(pattern_len_40-100)
  802. if pattern_len_41 < 101:
  803. progression_len_41 = pattern_len_41 * "*"
  804. else:
  805. progression_len_41 = 100 * "*+"+str(pattern_len_41-100)
  806. if pattern_len_42 < 101:
  807. progression_len_42 = pattern_len_42 * "*"
  808. else:
  809. progression_len_42 = 100 * "*+"+str(pattern_len_42-100)
  810. if pattern_len_43 < 101:
  811. progression_len_43 = pattern_len_43 * "*"
  812. else:
  813. progression_len_43 = 100 * "*+"+str(pattern_len_43-100)
  814. if pattern_len_44 < 101:
  815. progression_len_44 = pattern_len_44 * "*"
  816. else:
  817. progression_len_44 = 100 * "*+"+str(pattern_len_44-100)
  818. if pattern_len_45 < 101:
  819. progression_len_45 = pattern_len_45 * "*"
  820. else:
  821. progression_len_45 = 100 * "*+"+str(pattern_len_45-100)
  822. if pattern_len_46 < 101:
  823. progression_len_46 = pattern_len_46 * "*"
  824. else:
  825. progression_len_46 = 100 * "*+"+str(pattern_len_46-100)
  826. if pattern_len_47 < 101:
  827. progression_len_47 = pattern_len_47 * "*"
  828. else:
  829. progression_len_47 = 100 * "*+"+str(pattern_len_47-100)
  830. if pattern_len_48 < 101:
  831. progression_len_48 = pattern_len_48 * "*"
  832. else:
  833. progression_len_48 = 100 * "*+"+str(pattern_len_48-100)
  834. if pattern_len_49 < 101:
  835. progression_len_49 = pattern_len_49 * "*"
  836. else:
  837. progression_len_49 = 100 * "*+"+str(pattern_len_49-100)
  838. if pattern_len_50 < 101:
  839. progression_len_50 = pattern_len_50 * "*"
  840. else:
  841. progression_len_50 = 100 * "*+"+str(pattern_len_50-100)
  842. if pattern_len_1 > 0:
  843. print(" - [length = 1] | "+progression_len_1 + " [ "+str(pattern_len_1)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  844. if pattern_len_2 > 0:
  845. print(" - [length = 2] | "+progression_len_2 + " [ "+str(pattern_len_2)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  846. if pattern_len_3 > 0:
  847. print(" - [length = 3] | "+progression_len_3 + " [ "+str(pattern_len_3)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  848. if pattern_len_4 > 0:
  849. print(" - [length = 4] | "+progression_len_4 + " [ "+str(pattern_len_4)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  850. if pattern_len_5 > 0:
  851. print(" - [length = 5] | "+progression_len_5 + " [ "+str(pattern_len_5)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  852. if pattern_len_6 > 0:
  853. print(" - [length = 6] | "+progression_len_6 + " [ "+str(pattern_len_6)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  854. if pattern_len_7 > 0:
  855. print(" - [length = 7] | "+progression_len_7 + " [ "+str(pattern_len_7)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  856. if pattern_len_8 > 0:
  857. print(" - [length = 8] | "+progression_len_8 + " [ "+str(pattern_len_8)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  858. if pattern_len_9 > 0:
  859. print(" - [length = 9] | "+progression_len_9 + " [ "+str(pattern_len_9)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  860. if pattern_len_10 > 0:
  861. print(" - [length = 10] | "+progression_len_10 + " [ "+str(pattern_len_10)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  862. if pattern_len_11 > 0:
  863. print(" - [length = 11] | "+progression_len_11 + " [ "+str(pattern_len_11)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  864. if pattern_len_12 > 0:
  865. print(" - [length = 12] | "+progression_len_12 + " [ "+str(pattern_len_12)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  866. if pattern_len_13 > 0:
  867. print(" - [length = 13] | "+progression_len_13 + " [ "+str(pattern_len_13)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  868. if pattern_len_14 > 0:
  869. print(" - [length = 14] | "+progression_len_14 + " [ "+str(pattern_len_14)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  870. if pattern_len_15 > 0:
  871. print(" - [length = 15] | "+progression_len_15 + " [ "+str(pattern_len_15)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  872. if pattern_len_16 > 0:
  873. print(" - [length = 16] | "+progression_len_16 + " [ "+str(pattern_len_16)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  874. if pattern_len_17 > 0:
  875. print(" - [length = 17] | "+progression_len_17 + " [ "+str(pattern_len_17)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  876. if pattern_len_18 > 0:
  877. print(" - [length = 18] | "+progression_len_18 + " [ "+str(pattern_len_18)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  878. if pattern_len_19 > 0:
  879. print(" - [length = 19] | "+progression_len_19 + " [ "+str(pattern_len_19)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  880. if pattern_len_20 > 0:
  881. print(" - [length = 20] | "+progression_len_20 + " [ "+str(pattern_len_20)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  882. if pattern_len_21 > 0:
  883. print(" - [length = 21] | "+progression_len_21 + " [ "+str(pattern_len_21)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  884. if pattern_len_22 > 0:
  885. print(" - [length = 22] | "+progression_len_22 + " [ "+str(pattern_len_22)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  886. if pattern_len_23 > 0:
  887. print(" - [length = 23] | "+progression_len_23 + " [ "+str(pattern_len_23)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  888. if pattern_len_24 > 0:
  889. print(" - [length = 24] | "+progression_len_24 + " [ "+str(pattern_len_24)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  890. if pattern_len_25 > 0:
  891. print(" - [length = 25] | "+progression_len_25 + " [ "+str(pattern_len_25)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  892. if pattern_len_26 > 0:
  893. print(" - [length = 26] | "+progression_len_26 + " [ "+str(pattern_len_26)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  894. if pattern_len_27 > 0:
  895. print(" - [length = 27] | "+progression_len_27 + " [ "+str(pattern_len_27)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  896. if pattern_len_28 > 0:
  897. print(" - [length = 28] | "+progression_len_28 + " [ "+str(pattern_len_28)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  898. if pattern_len_29 > 0:
  899. print(" - [length = 29] | "+progression_len_29 + " [ "+str(pattern_len_29)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  900. if pattern_len_30 > 0:
  901. print(" - [length => 30] | "+progression_len_30 + " [ "+str(pattern_len_30)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  902. if pattern_len_31 > 0:
  903. print(" - [length = 11] | "+progression_len_31 + " [ "+str(pattern_len_31)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  904. if pattern_len_32 > 0:
  905. print(" - [length = 12] | "+progression_len_32 + " [ "+str(pattern_len_32)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  906. if pattern_len_33 > 0:
  907. print(" - [length = 13] | "+progression_len_33 + " [ "+str(pattern_len_33)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  908. if pattern_len_34 > 0:
  909. print(" - [length = 14] | "+progression_len_34 + " [ "+str(pattern_len_34)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  910. if pattern_len_35 > 0:
  911. print(" - [length = 15] | "+progression_len_35 + " [ "+str(pattern_len_35)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  912. if pattern_len_36 > 0:
  913. print(" - [length = 16] | "+progression_len_36 + " [ "+str(pattern_len_36)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  914. if pattern_len_37 > 0:
  915. print(" - [length = 17] | "+progression_len_37 + " [ "+str(pattern_len_37)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  916. if pattern_len_38 > 0:
  917. print(" - [length = 18] | "+progression_len_38 + " [ "+str(pattern_len_38)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  918. if pattern_len_39 > 0:
  919. print(" - [length = 19] | "+progression_len_39 + " [ "+str(pattern_len_39)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  920. if pattern_len_40 > 0:
  921. print(" - [length = 20] | "+progression_len_30 + " [ "+str(pattern_len_40)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  922. if pattern_len_41 > 0:
  923. print(" - [length = 21] | "+progression_len_41 + " [ "+str(pattern_len_41)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  924. if pattern_len_42 > 0:
  925. print(" - [length = 22] | "+progression_len_42 + " [ "+str(pattern_len_42)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  926. if pattern_len_43 > 0:
  927. print(" - [length = 23] | "+progression_len_43 + " [ "+str(pattern_len_43)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  928. if pattern_len_44 > 0:
  929. print(" - [length = 24] | "+progression_len_44 + " [ "+str(pattern_len_44)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  930. if pattern_len_45 > 0:
  931. print(" - [length = 25] | "+progression_len_45 + " [ "+str(pattern_len_45)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  932. if pattern_len_46 > 0:
  933. print(" - [length = 26] | "+progression_len_46 + " [ "+str(pattern_len_46)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  934. if pattern_len_47 > 0:
  935. print(" - [length = 27] | "+progression_len_47 + " [ "+str(pattern_len_47)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  936. if pattern_len_48 > 0:
  937. print(" - [length = 28] | "+progression_len_48 + " [ "+str(pattern_len_48)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  938. if pattern_len_49 > 0:
  939. print(" - [length = 29] | "+progression_len_49 + " [ "+str(pattern_len_49)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  940. if pattern_len_50 > 0:
  941. print(" - [length => 30] | "+progression_len_50 + " [ "+str(pattern_len_50)+" / "+str(estimated_quantity_per_pattern_for_library_completed)+" ]")
  942. return memory
  943. def list_genomes_on_database():
  944. print("[LIST] [REPORTING] [DNA SECUENCES] ... -> [STARTING!]\n")
  945. print("-"*15 + "\n")
  946. f=open(genomes_list_path, 'w')
  947. for k, v in genomes.items():
  948. print ("*"+str(k)+ "-> [ "+str(len(v))+" bp linear RNA ]")
  949. print (" + [A] Adenine :", str(v.count("A")))
  950. print (" + [G] Guanine :", str(v.count("G")))
  951. print (" + [C] Cytosine :", str(v.count("C")))
  952. print (" + [T] Thymine :", str(v.count("T")))
  953. f.write(str("*"+ str(k)+ " -> [ "+str(len(v))+"bp linear RNA ]\n"))
  954. f.write(str(" + [A] Adenine : " + str(v.count("A"))+"\n"))
  955. f.write(str(" + [G] Guanine : " + str(v.count("G"))+"\n"))
  956. f.write(str(" + [C] Cytosine : " + str(v.count("C"))+"\n"))
  957. f.write(str(" + [T] Thymine : " + str(v.count("T"))+"\n"))
  958. if v.count("N") > 0:
  959. print (" + [N] *ANY* :", str(v.count("N")))
  960. f.write(str(" + [N] *ANY* : "+ str(v.count("N"))+"\n"))
  961. print ("")
  962. f.write("\n")
  963. print("-"*15 + "\n")
  964. print ("[LIST] [INFO] [SAVED!] at: '"+str(genomes_list_path)+"'... -> [EXITING!]\n")
  965. f.close()
  966. def examine_stored_brain_memory():
  967. memory = [] # list used as hot-memory
  968. f=open(brain_path, 'r')
  969. for line in f.readlines():
  970. if line not in memory:
  971. memory.append(line)
  972. f.close()
  973. if memory == "": # first time run!
  974. print ("[LIBRE-AI] [INFO] Not any [BRAIN] present ... -> [BUILDING ONE!]\n")
  975. print("-"*15 + "\n")
  976. for i in range(2, 11+1):
  977. seed = [random.randrange(0, 4) for _ in range(i)] # generate "static" genesis seed
  978. if seed not in seeds_checked:
  979. seeds_checked.append(seed)
  980. pattern = ""
  981. for n in seed:
  982. if n == 0:
  983. pattern += "A"
  984. elif n == 1:
  985. pattern += "C"
  986. elif n == 2:
  987. pattern += "T"
  988. else:
  989. pattern += "G"
  990. print("[LIBRE-AI] [SEARCH] Generating [RANDOM] pattern: " + str(pattern) + "\n")
  991. create_new_pattern(pattern) # create new pattern
  992. print("-"*15 + "\n")
  993. print ("[LIBRE-AI] [INFO] A new [BRAIN] has been created !!! ... -> [ADVANCING!]\n")
  994. f=open(brain_path, 'r')
  995. memory = f.read().replace('\n',' ')
  996. f.close()
  997. return memory
  998. def print_banner():
  999. print("\n"+"="*50)
  1000. print(" ____ _ _ _ _ ")
  1001. print("| _ \(_) __ _| \ | | / \ ")
  1002. print("| | | | |/ _` | \| | / _ \ ")
  1003. print("| |_| | | (_| | |\ |/ ___ \ ")
  1004. print("|____/|_|\__,_|_| \_/_/ \_\ by psy")
  1005. print('\n"Search and Recognize patterns in DNA sequences"')
  1006. print("\n"+"="*50)
  1007. print("+ GENOMES DETECTED:", str(num_files))
  1008. print("="*50)
  1009. print("\n"+"-"*15+"\n")
  1010. print(" * VERSION: ")
  1011. print(" + "+VERSION+" - (rev:"+RELEASE+")")
  1012. print("\n * SOURCES:")
  1013. print(" + "+SOURCE1)
  1014. print(" + "+SOURCE2)
  1015. print("\n * CONTACT: ")
  1016. print(" + "+CONTACT+"\n")
  1017. print("-"*15+"\n")
  1018. print("="*50)
  1019. # sub_init #
  1020. num_files=0
  1021. for file in glob.iglob(genomes_path + '**/*', recursive=True):
  1022. if(file.endswith(".genome")):
  1023. num_files = num_files + 1
  1024. f=open(file, 'r')
  1025. genome = f.read().replace('\n',' ')
  1026. genomes[file.replace("datasets/","")] = genome.upper() # add genome to main dict
  1027. f.close()
  1028. print_banner() # show banner
  1029. option = input("\n+ CHOOSE: (S)earch, (L)ist, (T)rain or (R)eport: ").upper()
  1030. print("")
  1031. print("="*50+"\n")
  1032. if option == "S": # search pattern
  1033. search_pattern_with_human()
  1034. elif option == "L": # list genomes
  1035. list_genomes_on_database()
  1036. elif option == "T": # teach AI
  1037. teach_ai()
  1038. else: # libre AI
  1039. libre_ai()
  1040. print ("="*50+"\n")