prisma_7d_db_copier.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. import datetime
  2. import pandas as pd
  3. import pymongo
  4. from config_info.config import *
  5. db_client = pymongo.MongoClient(DB_URL)
  6. prisma_db = db_client["prisma-32_db"]
  7. def prisma_7d_past_data_copier(date, cluster):
  8. if cluster == 1:
  9. n7_file_template = f"n7_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
  10. n7_file = pd.read_csv(PATH_TO_PRISMA_1_7d_DATA + n7_file_template, sep=' ', skipinitialspace=True, header=None)
  11. n7_file = n7_file.dropna(axis=1, how='all')
  12. for i in range(len(n7_file[0])):
  13. if type(n7_file[0][i]) is str:
  14. n7_file.loc[i,0] = float('.'.join(n7_file.loc[i,0].split(',')))
  15. print("Data file: {}".format(PATH_TO_PRISMA_1_7d_DATA + n7_file_template))
  16. fix_end_time_series = n7_file[0].lt(n7_file[0].shift())
  17. bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
  18. else:
  19. n7_file_template = f"2n7_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
  20. n7_file = pd.read_csv(PATH_TO_PRISMA_2_7d_DATA + n7_file_template, sep=' ', skipinitialspace=True, header=None)
  21. n7_file = n7_file.dropna(axis=1, how='all')
  22. for i in range(len(n7_file[0])):
  23. if type(n7_file[0][i]) is str:
  24. n7_file.loc[i,0] = float('.'.join(n7_file.loc[i,0].split(',')))
  25. print("Data file: {}".format(PATH_TO_PRISMA_2_7d_DATA + n7_file_template))
  26. fix_end_time_series = n7_file[0].lt(n7_file[0].shift())
  27. bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
  28. for index in range(len(n7_file.index)):
  29. params = list(n7_file.iloc[index])
  30. # if type(params[0]) is str:
  31. # params[0] = float('.'.join(params[0].split(',')))
  32. event_time = str(datetime.timedelta(seconds=params[0])) # перевод в utc-формат
  33. # event_date = (datetime.timedelta(seconds=params[0]) + datetime.timedelta(hours=3)).date()
  34. event_datetime = datetime.datetime(date.year, date.month, date.day, int(event_time.split(':')[0]),
  35. int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
  36. int(round(
  37. float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
  38. 2) * 10 ** 6)) - datetime.timedelta(hours=4)
  39. if index >= bad_end_time_index:
  40. new_date = date + datetime.timedelta(days=1)
  41. event_datetime = datetime.datetime(new_date.year, new_date.month, new_date.day,
  42. int(event_time.split(':')[0]),
  43. int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
  44. int(round(
  45. float(event_time.split(':')[2]) - int(
  46. float(event_time.split(':')[2])),
  47. 2) * 10 ** 6)) - datetime.timedelta(hours=4)
  48. trigger = params[2]
  49. amp = [int(params[j]) for j in range(3, 19)]
  50. det_params = {}
  51. for i in range(1, 17):
  52. det_params[f'det_{i:02}'] = {
  53. 'amplitude': amp[i - 1]
  54. }
  55. try:
  56. new_record = {
  57. '_id': f'{event_datetime.date()}_{cluster:02}_07d_{int(event_datetime.hour):02}:' +
  58. f'{int(event_datetime.minute):02}:{int(event_datetime.second):02}.' +
  59. f'{str(event_datetime.microsecond)[:3]}.000.000',
  60. 'time_ns': int((int(event_datetime.hour) * 1440 + int(event_datetime.minute) * 60 + int(
  61. event_datetime.second)) * 10e8 + int(event_datetime.microsecond) * 1000),
  62. 'cluster': cluster,
  63. 'trigger': int(trigger),
  64. 'detectors': det_params
  65. }
  66. collection_prisma = prisma_db[f'{str(event_datetime.date())}_7d']
  67. ins_result = collection_prisma.insert_one(new_record)
  68. print(f'Copied - {ins_result.inserted_id}')
  69. except pymongo.errors.DuplicateKeyError:
  70. print(f'Ошибка - {event_datetime.date()}-{event_time}')
  71. # Press the green button in the gutter to run the script.
  72. if __name__ == '__main__':
  73. cluster_1 = 1
  74. cluster_2 = 2
  75. date_time_start = datetime.date(2021, 12, 1) # посмотреть почему не собирается конец дня 2018-04-22
  76. date_time_stop = datetime.date(2021, 12, 31)
  77. LIST_OF_DATES = [(date_time_start + datetime.timedelta(days=i)) for i in
  78. range((date_time_stop - date_time_start).days + 1)]
  79. for date in LIST_OF_DATES:
  80. try:
  81. prisma_7d_past_data_copier(date, cluster_1)
  82. except FileNotFoundError:
  83. print(f'файла {cluster_1}-го кластера от {date} не существует')
  84. try:
  85. prisma_7d_past_data_copier(date, cluster_2)
  86. except FileNotFoundError:
  87. print(f'файла {cluster_2}-го кластера от {date} не существует')
  88. print('test')