prisma_12d_db_copier.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import datetime
  2. import pandas as pd
  3. import pymongo
  4. from config_info.config import *
  5. db_client = pymongo.MongoClient(DB_URL)
  6. prisma_db = db_client["prisma-32_db"]
  7. def t_file_converter(path, cluster, date_):
  8. """Converter for PRISMA t-files"""
  9. with open(f'{path}{cluster}t_{date_.month:02}-{date_.day:02}.{date_.year - 2000:02}') as f:
  10. raw_data = f.readlines()
  11. raw_data = [line.rstrip() for line in raw_data]
  12. # Убираем переводы строки
  13. event_list = []
  14. main_list = []
  15. sep = 0
  16. for i in range(len(raw_data)):
  17. if raw_data[i] == '*#*':
  18. main_list.append(raw_data[sep].split(' '))
  19. event_list.append(raw_data[sep + 1:i])
  20. sep = i + 1
  21. unit_delay = []
  22. for item in event_list:
  23. delay_per_event = []
  24. for line in item:
  25. step = line.split(' ')
  26. for i in range(1, 17):
  27. if int(step[i]) != 0:
  28. delay_per_event.append([round(int(step[0]) * (10 ** (-4)), 4), i, int(step[i])])
  29. unit_delay.append(delay_per_event)
  30. plural_data_list = []
  31. for i in unit_delay:
  32. time_list = []
  33. detector_list = []
  34. neut_quantity_list = []
  35. for j in i:
  36. time_list.append(j[0])
  37. detector_list.append(j[1])
  38. neut_quantity_list.append(j[2])
  39. plural_data_list.append([time_list, detector_list, neut_quantity_list])
  40. for i in range(len(main_list)):
  41. main_list[i].extend(plural_data_list[i])
  42. t_file_df = pd.DataFrame(main_list,
  43. columns=['time', 'number', 'sum_n', 'trigger', 'time_delay', 'detectors', 'n_per_step'])
  44. t_file_df = t_file_df.astype({"time": float, "number": int, "sum_n": int, "trigger": int})
  45. return t_file_df
  46. def prisma_12d_past_data_copier(date, cluster):
  47. if cluster == 1:
  48. n_file_template = f"n_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
  49. n_file = pd.read_csv(PATH_TO_PRISMA_1_DATA + n_file_template, sep=' ', skipinitialspace=True, header=None)
  50. n_file = n_file.dropna(axis=1, how='all')
  51. n_file.columns = ['time', 'number', 'sum_n', 'trigger'] + list(range(32))
  52. print("Data file: {}".format(PATH_TO_PRISMA_1_DATA + n_file_template))
  53. t_file = t_file_converter(PATH_TO_PRISMA_1_T_FILES, "", date)
  54. n_file = n_file.merge(t_file)
  55. fix_end_time_series = n_file['time'].lt(n_file['time'].shift())
  56. bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
  57. else:
  58. n_file_template = f"2n_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
  59. n_file = pd.read_csv(PATH_TO_PRISMA_2_DATA + n_file_template, sep=' ', skipinitialspace=True, header=None)
  60. n_file = n_file.dropna(axis=1, how='all')
  61. n_file.columns = ['time', 'number', 'sum_n', 'trigger'] + list(range(32))
  62. print("Data file: {}".format(PATH_TO_PRISMA_2_DATA + n_file_template))
  63. t_file = t_file_converter(PATH_TO_PRISMA_2_T_FILES, 2, date)
  64. n_file = n_file.merge(t_file)
  65. fix_end_time_series = n_file['time'].lt(n_file['time'].shift())
  66. bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
  67. for index in range(len(n_file.index)):
  68. params = list(n_file.iloc[index])
  69. event_time = str(datetime.timedelta(seconds=params[0]))
  70. # event_date = (datetime.timedelta(seconds=params[0]) + datetime.timedelta(hours=3)).date()
  71. event_datetime = datetime.datetime(date.year, date.month, date.day, int(event_time.split(':')[0]),
  72. int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
  73. int(round(
  74. float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
  75. 2) * 10 ** 6)) - datetime.timedelta(hours=4)
  76. if index >= bad_end_time_index:
  77. new_date = date + datetime.timedelta(days=1)
  78. event_datetime = datetime.datetime(new_date.year, new_date.month, new_date.day, int(event_time.split(':')[0]),
  79. int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
  80. int(round(
  81. float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
  82. 2) * 10 ** 6)) - datetime.timedelta(hours=4)
  83. trigger = params[3]
  84. amp = [int(params[j]) for j in range(4, 36, 2)]
  85. n = [int(params[j]) for j in range(5, 37, 2)]
  86. n_time_delay = params[36]
  87. detector = params[37]
  88. n_in_step = params[38]
  89. det_params = {}
  90. for i in range(1, 17):
  91. n_time_delay_by_det = []
  92. detector_index = [ind for ind, v in enumerate(detector) if v == i]
  93. for j in detector_index:
  94. n_time_delay_by_det.extend([n_time_delay[j]] * int(n_in_step[j]))
  95. # В БД будут оставаться пустые списки при нуле нейтронов, надо ли это фиксить?
  96. det_params[f'det_{i:02}'] = {
  97. 'amplitude': amp[i - 1],
  98. 'neutrons': n[i - 1],
  99. 'time_delay': n_time_delay_by_det
  100. }
  101. try:
  102. new_record = {
  103. '_id': f'{event_datetime.date()}_{cluster:02}_12d_{int(event_datetime.hour):02}:' +
  104. f'{int(event_datetime.minute):02}:{int(event_datetime.second):02}.' +
  105. f'{str(event_datetime.microsecond)[:3]}.000.000',
  106. 'time_ns': int((int(event_datetime.hour) * 1440 + int(event_datetime.minute) * 60 + int(
  107. event_datetime.second)) * 10e8 + int(event_datetime.microsecond) * 1000),
  108. 'cluster': cluster,
  109. 'trigger': int(trigger),
  110. 'detectors': det_params
  111. }
  112. collection_prisma = prisma_db[f'{str(event_datetime.date())}_12d']
  113. ins_result = collection_prisma.insert_one(new_record)
  114. print(f'Copied - {ins_result.inserted_id}')
  115. except pymongo.errors.DuplicateKeyError:
  116. print(f'Ошибка - {event_datetime.date()}-{event_time}')
  117. # Press the green button in the gutter to run the script.
  118. if __name__ == '__main__':
  119. cluster_1 = 1
  120. cluster_2 = 2
  121. date_time_start = datetime.date(2021, 12, 1) # посмотреть почему не собирается конец дня 2018-04-22
  122. date_time_stop = datetime.date(2021, 12, 1)
  123. LIST_OF_DATES = [(date_time_start + datetime.timedelta(days=i)) for i in
  124. range((date_time_stop - date_time_start).days + 1)]
  125. for date in LIST_OF_DATES:
  126. try:
  127. prisma_12d_past_data_copier(date, cluster_1)
  128. except FileNotFoundError:
  129. print(f'файла {cluster_1}-го кластера от {date} не существует')
  130. try:
  131. prisma_12d_past_data_copier(date, cluster_2)
  132. except FileNotFoundError:
  133. print(f'файла {cluster_2}-го кластера от {date} не существует')
  134. print('test')