Преглед на файлове

add class in noSQL_db_Prisma.py, make file_reader and db_file_reader

JustDreamer7 преди 2 години
родител
ревизия
6d5ff3be8b
променени са 7 файла, в които са добавени 465 реда и са изтрити 10 реда
  1. 0 0
      file_reader/__init__.py
  2. 75 0
      file_reader/db_file_reader.py
  3. 132 0
      file_reader/file_reader.py
  4. 205 0
      noSQL_db_Prisma.py
  5. 3 4
      prisma_12d_db_copier.py
  6. 25 6
      prisma_7d_db_copier.py
  7. 25 0
      runner.py

+ 0 - 0
file_reader/__init__.py


+ 75 - 0
file_reader/db_file_reader.py

@@ -0,0 +1,75 @@
+import datetime
+from collections import defaultdict
+from file_reader.file_reader import FileReader
+
+import pandas as pd
+import pymongo
+
+
+class DbFileReader(FileReader):
+    # __DB_URL = "mongodb://localhost:27017/"
+
+    def __init__(self, cluster, single_date, db_url):
+        self.cluster = cluster
+        self.single_date = single_date
+        self.__db_url = db_url
+
+    def reading_db(self) -> pd.DataFrame():
+        """Метод, прочитывающий noSQL БД ПРИЗМА-32 с помощью DB_URL"""
+
+        data_cl = pd.DataFrame.from_records(
+            pymongo.MongoClient(self.__db_url)["prisma-32_db"][f'{str(self.single_date)}_12d'].find(
+                {'cluster': self.cluster}))
+        if data_cl.empty:
+            raise FileNotFoundError
+        amp_dict = defaultdict(list)
+        n_dict = defaultdict(list)
+        for item in data_cl['detectors']:
+            for j in [f'det_{i:02}' for i in range(1, 17)]:
+                amp_dict[j].append(item[j]['amplitude'])
+                n_dict[j].append(item[j]['neutrons'])
+
+        for i in range(1, 17):
+            data_cl[f'amp{i}'] = amp_dict[f'det_{i:02}']
+            data_cl[f'n{i}'] = n_dict[f'det_{i:02}']
+        data_cl['time'] = [round(item / 1e9, 2) for item in data_cl['time_ns']]
+        data_cl['Date'] = [datetime.date(item[0:3], item[5:6], item[8:9]) for item in data_cl['time_ns']]
+
+        return data_cl
+
+    def concat_n_data(self, concat_n_df):
+        data_cl = self.reading_db
+        # noinspection PyUnresolvedReferences
+        concat_n_df = pd.concat([concat_n_df, data_cl[['Date', 'time', 'trigger'] + DbFileReader.__amp_n_cols]],
+                                ignore_index=True)
+        return concat_n_df
+
+
+def db_preparing_data(start_date, end_date, path_to_db):
+    concat_n_df_1 = pd.DataFrame(columns=['Date', 'time', 'trigger'] + DbFileReader.__amp_n_cols)
+    concat_n_df_2 = pd.DataFrame(columns=['Date', 'time', 'trigger'] + DbFileReader.__amp_n_cols)
+    for single_date in pd.date_range(start_date, end_date):
+        try:
+            db_file_reader_1 = DbFileReader(cluster=1, single_date=single_date, db_url=path_to_db)
+            concat_n_df_1 = db_file_reader_1.concat_n_data(concat_n_df=concat_n_df_1)
+        except FileNotFoundError:
+            print(
+                f"File n_{single_date.month:02}-" +
+                f"{single_date.day:02}.{single_date.year - 2000:02}', does not exist")
+        try:
+            db_file_reader_1 = DbFileReader(cluster=2, single_date=single_date, db_url=path_to_db)
+            concat_n_df_2 = db_file_reader_1.concat_n_data(concat_n_df=concat_n_df_2)
+        except FileNotFoundError:
+            print(
+                f"File 2n_{single_date.month:02}-" +
+                f"{single_date.day:02}.{single_date.year - 2000:02}', does not exist")
+
+    return concat_n_df_1, concat_n_df_2
+
+# if __name__ == '__main__':
+#     date_time_start = datetime.date(2021, 11, 1)  # посмотреть почему не собирается конец дня 2018-04-22
+#     date_time_stop = datetime.date(2021, 11, 1)
+#     LIST_OF_DATES = [(date_time_start + datetime.timedelta(days=i)) for i in
+#                      range((date_time_stop - date_time_start).days + 1)]
+#     for date in LIST_OF_DATES:
+#         print(DbFileReader(cluster=1, single_date=date, db_url="mongodb://localhost:27017/").reading_db())

+ 132 - 0
file_reader/file_reader.py

@@ -0,0 +1,132 @@
+import datetime
+from collections import defaultdict
+
+import pandas as pd
+import pymongo
+
+
+class FileReader:
+    __amp_n_cols = []
+    for i in range(1, 17):
+        __amp_n_cols.append(f'amp{i}')
+        __amp_n_cols.append(f'n{i}')
+
+    def __init__(self, cluster, single_date, path_to_files='', path_to_files_7d=''):
+        self.cluster = cluster
+        if cluster == 1:
+            self.cluster_n = ''
+        else:
+            self.cluster_n = '2'
+        self.path_to_files = path_to_files
+        self.path_to_files_7d = path_to_files_7d
+        self.single_date = single_date
+        self.n_file_today, self.n_file_day_after = self._reading_n_file()
+        self.n7_file_today, self.n7_file_day_after = self._reading_n7_file()
+
+    def __del__(self):
+        pass
+
+    def reading_file(self, file_type) -> pd.DataFrame:
+        file = pd.read_csv(
+            f'{self.path_to_files}\\{file_type}\\{self.cluster_n}n_{self.single_date.month:02}' +
+            f'-{self.single_date.day:02}.{self.single_date.year - 2000:02}',
+            sep=r'\s[-]*\s*', header=None, skipinitialspace=True, index_col=False)
+        file.dropna(axis=1, how='all', inplace=True)
+        return file
+
+    def preparing_n_file(self):
+        n_file = self.reading_file(file_type='n')
+        n_file.columns = ['time', 'number', 'sum_n', 'trigger'] + FileReader.__amp_n_cols
+
+    def _reading_n_file(self):
+        """Метод, прочитывающий n-файлы, возвращающий датафрейм дня на выходе. Или возвращающий filenotfounderror, если
+        файла нет"""
+        n_file = pd.read_csv(
+            f'{self.path_to_files}\\{self.cluster_n}n_{self.single_date.month:02}' +
+            f'-{self.single_date.day:02}.{self.single_date.year - 2000:02}',
+            sep=' ', header=None, skipinitialspace=True, index_col=False,
+            names=['time', 'number', 'sum_n', 'trigger'] + FileReader.__amp_n_cols)
+        n_file.dropna(axis=1, how='all', inplace=True)
+        time_difference = n_file['time'].diff()
+        bad_end_time_index = time_difference[time_difference < -10000].index
+        if any(bad_end_time_index):
+            n_file_today = n_file[n_file.index < bad_end_time_index[0]]
+            n_file_day_after = n_file[n_file.index >= bad_end_time_index[0]]
+            return n_file_today, n_file_day_after
+        return n_file, []
+
+    def _reading_n7_file(self):
+        n7_file = pd.read_csv(
+            f'{self.path_to_files_7d}\\{self.cluster_n}n7_{self.single_date.month:02}' +
+            f'-{self.single_date.day:02}.{self.single_date.year - 2000:02}',
+            sep=' ', header=None, skipinitialspace=True, index_col=False)
+        n7_file.dropna(axis=1, how='all', inplace=True)
+        for i in range(len(n7_file[0])):
+            if type(n7_file[0][i]) is str:
+                n7_file.loc[i, 0] = float('.'.join(n7_file.loc[i, 0].split(',')))
+        time_difference = n7_file[0].diff()
+        bad_end_time_index = time_difference[time_difference < -10000].index
+        if any(bad_end_time_index):
+            n7_file_today = n7_file[n7_file.index < bad_end_time_index[0]]
+            n7_file_day_after = n7_file[n7_file.index >= bad_end_time_index[0]]
+            return n7_file_today, n7_file_day_after
+        return n7_file, []
+
+    @staticmethod
+    def concat_n_data(cls_object, concat_n_df):
+        cls_object.n_file_today['Date'] = [cls_object.single_date.date()] * len(cls_object.n_file_today.index)
+        concat_n_df = pd.concat([concat_n_df, cls_object.n_file_today],
+                                ignore_index=True)
+        if any(cls_object.n_file_day_after):
+            cls_object.n_file_day_after['Date'] = [(cls_object.single_date + datetime.timedelta(
+                days=1)).date()] * len(cls_object.n_file_day_after.index)
+            concat_n_df = pd.concat([concat_n_df, cls_object.n_file_day_after],
+                                    ignore_index=True)
+        return concat_n_df
+
+    def reading_p_file(self):
+        """Метод, прочитывающий p-файлы, возвращающий датафрейм дня на выходе. Или возвращающий filenotfounderror, если
+        файла нет"""
+        try:
+            p_file = pd.read_csv(
+                f'{self.path_to_files}\\nv\\{self.cluster}p{self.single_date.date().month:02}' +
+                f'-{self.single_date.date().day:02}.{self.single_date.date().year - 2000:02}',
+                sep=r'\s[-]*\s*', header=None, skipinitialspace=True, engine='python')
+            p_file.dropna(axis=1, how='all', inplace=True)
+            corr_p_file = self.correcting_p_file(p_file)
+            return corr_p_file
+        except FileNotFoundError as error:
+            print(f"File {self.path_to_files}\\nv\\{self.cluster}p{self.single_date.date().month:02}-" +
+                  f"{self.single_date.date().day:02}.{self.single_date.date().year - 2000:02} does not exist")
+            return error.strerror
+
+    @staticmethod
+    def correcting_p_file(p_file):
+        """Метод, корректирующий старые файлы ПРИЗМА-32, возвращающий скорректированный датафрейм"""
+        p_file['time'] = p_file[0]
+        del p_file[0]
+        p_file = p_file.sort_values(by='time')
+        if len(p_file['time']) > len(p_file['time'].unique()):
+            """Данный костыль нужен для старых p-файлов ПРИЗМА-32(до 14-15 гг.), в которых индексы строк, 
+            по сути обозначающие 5 минут реального времени между ранами, могут повторяться. """
+            p_file.drop_duplicates(keep=False, inplace=True)
+            """После удаления полных дубликатов ищем повторяющиеся индексы. Сначала удаляем строки, 
+            состоящие полностью из нулей и точек (value = len(p_file.columns)), потом ищем множество 
+            дубликатов индексов и множество строк, почти полностью (value > 30) состоящих из нулей и точек. 
+            Берем пересечение этих двух множеств и удаляем находящиеся в пересечении строки"""
+            null_row = dict(p_file.isin([0, '.']).sum(axis=1))  # Проверяем на нули и точки
+            all_null_index = list(
+                {key: value for key, value in null_row.items() if value == len(p_file.columns)}.keys())
+            p_file.drop(index=all_null_index, inplace=True)
+
+            null_index = list(
+                {key: value for key, value in null_row.items() if value > len(p_file.columns) - 5}.keys())
+            same_index = dict(p_file['time'].duplicated(keep=False))
+            same_index_row = list({key: value for key, value in same_index.items() if value is True}.keys())
+            bad_index = list(set(null_index) & set(same_index_row))
+            p_file.drop(index=bad_index, inplace=True)
+            """Также может быть, что после фильтрации осталось больше строк, чем нужно, так как в старых 
+            p-файлах может быть больше индексов, чем минут в дне. Тогда оставляем только 288"""
+            if len(p_file.index) == 289:
+                p_file = p_file.head(288)
+        return p_file

+ 205 - 0
noSQL_db_Prisma.py

@@ -0,0 +1,205 @@
+import datetime
+
+import pandas as pd
+import pymongo
+
+from config_info.config import *
+from file_reader.file_reader import FileReader
+
+
+# noinspection DuplicatedCode
+class NoSQLPrisma:
+    __DB_URL = DB_URL
+    __db_client = pymongo.MongoClient(__DB_URL)
+    __prisma_db = __db_client["prisma-32_db"]
+
+    def __init__(self, cluster, single_date):
+        self.cluster = cluster
+        if self.cluster == 1:
+            self.cluster_n = ""
+        else:
+            self.cluster_n = '2'
+        self.single_date = single_date
+        self.__PATH_TO_PRISMA_N_DATA = f"D:\\PRISMA20\\P{self.cluster}"
+        self.__PATH_TO_PRISMA_7d_DATA = f"D:\\PRISMA20\\P{self.cluster}\\n7"
+        self.__PATH_TO_PRISMA_T_DATA = f"D:\\PRISMA20\\P{self.cluster}\\t\\"
+        self.file_reader = FileReader(cluster=self.cluster, single_date=self.single_date,
+                                      path_to_files=self.__PATH_TO_PRISMA_N_DATA,
+                                      path_to_files_7d=self.__PATH_TO_PRISMA_7d_DATA)
+
+    def __del__(self):
+        pass
+
+    def t_file_converter(self, path_to_t_file):
+        """Converter for PRISMA t-files"""
+        with open(
+                f'{path_to_t_file}{self.cluster_n}t_{self.single_date.month:02}-{self.single_date.day:02}.{self.single_date.year - 2000:02}') as f:
+            raw_data = f.readlines()
+        raw_data = [line.rstrip() for line in raw_data]
+        # Убираем переводы строки
+        event_list = []
+        main_list = []
+        sep = 0
+        for i in range(len(raw_data)):
+            if raw_data[i] == '*#*':
+                main_list.append(raw_data[sep].split(' '))
+                event_list.append(raw_data[sep + 1:i])
+                sep = i + 1
+        unit_delay = []
+        for item in event_list:
+            delay_per_event = []
+            for line in item:
+                step = line.split(' ')
+                for i in range(1, 17):
+                    if int(step[i]) != 0:
+                        delay_per_event.append([round(int(step[0]) * (10 ** (-4)), 4), i, int(step[i])])
+            unit_delay.append(delay_per_event)
+        plural_data_list = []
+        for i in unit_delay:
+            time_list = []
+            detector_list = []
+            neut_quantity_list = []
+            for j in i:
+                time_list.append(j[0])
+                detector_list.append(j[1])
+                neut_quantity_list.append(j[2])
+            plural_data_list.append([time_list, detector_list, neut_quantity_list])
+        for i in range(len(main_list)):
+            main_list[i].extend(plural_data_list[i])
+        t_file_df = pd.DataFrame(main_list,
+                                 columns=['time', 'number', 'sum_n', 'trigger', 'time_delay', 'detectors',
+                                          'n_per_step'])
+        t_file_df = t_file_df.astype({"time": float, "number": int, "sum_n": int, "trigger": int})
+        return t_file_df
+
+    def dinods_data_copier(self, event_datetime, trigger, det_params, dinode):
+        try:
+            new_record = {
+                '_id': f'{event_datetime.date()}_{self.cluster:02}_{dinode:02}d_{int(event_datetime.hour):02}:' +
+                       f'{int(event_datetime.minute):02}:{int(event_datetime.second):02}.' +
+                       f'{str(event_datetime.microsecond)[:3]}.000.000',
+                'time_ns': int((int(event_datetime.hour) * 3600 + int(event_datetime.minute) * 60 + int(
+                    event_datetime.second)) * 10e8 + int(event_datetime.microsecond) * 1000),
+                'cluster': self.cluster,
+                'trigger': int(trigger),
+                'detectors': det_params
+            }
+            collection_prisma = NoSQLPrisma.__prisma_db[f'{str(event_datetime.date())}_{dinode}d']
+            ins_result = collection_prisma.insert_one(new_record)
+            print(f'Copied - {ins_result.inserted_id}')
+        except pymongo.errors.DuplicateKeyError:
+            print(f'Ошибка - {event_datetime.date()}-{event_datetime.time()}')
+
+    def prisma_12d_past_data_copier(self):
+        t_file = self.t_file_converter(path_to_t_file=self.__PATH_TO_PRISMA_T_DATA)
+        n_file_today = self.file_reader.n_file_today.merge(t_file)
+        self.make_parameters_from_df_12_d(n_file_today, self.single_date)
+        if any(self.file_reader.n_file_day_after):
+            n_file_day_after = self.file_reader.n_file_day_after.merge(t_file)
+            self.make_parameters_from_df_12_d(n_file_day_after,
+                                              self.single_date + datetime.timedelta(
+                                                  days=1))
+
+    def prisma_7d_past_data_copier(self):
+        self.make_parameters_from_df_7_d(self.file_reader.n7_file_today, self.single_date)
+        if any(self.file_reader.n7_file_day_after):
+            self.make_parameters_from_df_7_d(self.file_reader.n7_file_day_after,
+                                             self.single_date + datetime.timedelta(days=1))
+
+    def make_parameters_from_df_12_d(self, df, date):
+        for index in range(len(df.index)):
+            params = list(df.iloc[index])
+            event_time = str(datetime.timedelta(seconds=params[0]))
+            event_datetime = datetime.datetime(date.year, date.month, date.day,
+                                               int(event_time.split(':')[0]),
+                                               int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
+                                               int(round(
+                                                   float(event_time.split(':')[2]) - int(
+                                                       float(event_time.split(':')[2])),
+                                                   2) * 10 ** 6)) - datetime.timedelta(hours=4)
+            trigger = params[3]
+            amp = [int(params[j]) for j in range(4, 36, 2)]
+            n = [int(params[j]) for j in range(5, 37, 2)]
+
+            n_time_delay = params[36]
+            detector = params[37]
+            n_in_step = params[38]
+
+            det_params = {}
+            for i in range(1, 17):
+                n_time_delay_by_det = []
+                detector_index = [ind for ind, v in enumerate(detector) if v == i]
+                for j in detector_index:
+                    n_time_delay_by_det.extend([n_time_delay[j]] * int(n_in_step[j]))
+                #  В БД будут оставаться пустые списки при нуле нейтронов, надо ли это фиксить?
+                det_params[f'det_{i:02}'] = {
+                    'amplitude': amp[i - 1],
+                    'neutrons': n[i - 1],
+                    'time_delay': n_time_delay_by_det
+                }
+            self.dinods_data_copier(event_datetime=event_datetime, trigger=trigger,
+                                    det_params=det_params, dinode=12)
+        return None
+
+    def make_parameters_from_df_7_d(self, df, date):
+        for index in range(len(df.index)):
+            params = list(df.iloc[index])
+            event_time = str(datetime.timedelta(seconds=params[0]))  # перевод в utc-формат
+            event_datetime = datetime.datetime(date.year, date.month, date.day,
+                                               int(event_time.split(':')[0]),
+                                               int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
+                                               int(round(
+                                                   float(event_time.split(':')[2]) - int(
+                                                       float(event_time.split(':')[2])),
+                                                   2) * 10 ** 6)) - datetime.timedelta(hours=4)
+            trigger = params[2]
+            amp = [int(params[j]) for j in range(3, 19)]
+
+            det_params = {}
+
+            for i in range(1, 17):
+                det_params[f'det_{i:02}'] = {
+                    'amplitude': amp[i - 1]
+                }
+            self.dinods_data_copier(event_datetime=event_datetime, trigger=trigger,
+                                    det_params=det_params, dinode=7)
+            return None
+# for index in range(len(n_file_today.index)):
+#     params = list(n_file_today.iloc[index])
+#     event_time = str(datetime.timedelta(seconds=params[0]))
+#     event_datetime = datetime.datetime(self.single_date.year, self.single_date.month, self.single_date.day,
+#                                        int(event_time.split(':')[0]),
+#                                        int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
+#                                        int(round(
+#                                            float(event_time.split(':')[2]) - int(
+#                                                float(event_time.split(':')[2])),
+#                                            2) * 10 ** 6)) - datetime.timedelta(hours=4)
+#     if index >= bad_end_time_index:
+#         new_date = self.single_date + datetime.timedelta(days=1)
+#         event_datetime = datetime.datetime(new_date.year, new_date.month, new_date.day,
+#                                            int(event_time.split(':')[0]),
+#                                            int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
+#                                            int(round(
+#                                                float(event_time.split(':')[2]) - int(
+#                                                    float(event_time.split(':')[2])),
+#                                                2) * 10 ** 6)) - datetime.timedelta(hours=4)
+#     trigger = params[3]
+#     amp = [int(params[j]) for j in range(4, 36, 2)]
+#     n = [int(params[j]) for j in range(5, 37, 2)]
+#
+#     n_time_delay = params[36]
+#     detector = params[37]
+#     n_in_step = params[38]
+#
+#     det_params = {}
+#     for i in range(1, 17):
+#         n_time_delay_by_det = []
+#         detector_index = [ind for ind, v in enumerate(detector) if v == i]
+#         for j in detector_index:
+#             n_time_delay_by_det.extend([n_time_delay[j]] * int(n_in_step[j]))
+#         #  В БД будут оставаться пустые списки при нуле нейтронов, надо ли это фиксить?
+#         det_params[f'det_{i:02}'] = {
+#             'amplitude': amp[i - 1],
+#             'neutrons': n[i - 1],
+#             'time_delay': n_time_delay_by_det
+#         }

+ 3 - 4
prisma_12d_db_copier.py

@@ -79,14 +79,14 @@ def prisma_12d_past_data_copier(date, cluster):
                                            int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
                                            int(round(
                                                float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
-                                               2) * 10 ** 6)) - datetime.timedelta(hours=3)
+                                               2) * 10 ** 6)) - datetime.timedelta(hours=4)
         if index >= bad_end_time_index:
             new_date = date + datetime.timedelta(days=1)
             event_datetime = datetime.datetime(new_date.year, new_date.month, new_date.day, int(event_time.split(':')[0]),
                                            int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
                                            int(round(
                                                float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
-                                               2) * 10 ** 6)) - datetime.timedelta(hours=3)
+                                               2) * 10 ** 6)) - datetime.timedelta(hours=4)
         trigger = params[3]
         amp = [int(params[j]) for j in range(4, 36, 2)]
         n = [int(params[j]) for j in range(5, 37, 2)]
@@ -123,8 +123,7 @@ def prisma_12d_past_data_copier(date, cluster):
             ins_result = collection_prisma.insert_one(new_record)
             print(f'Copied - {ins_result.inserted_id}')
         except pymongo.errors.DuplicateKeyError:
-            pass
-            # print(f'Ошибка - {event_datetime.date()}-{event_time}')
+            print(f'Ошибка - {event_datetime.date()}-{event_time}')
 
 
 # Press the green button in the gutter to run the script.

+ 25 - 6
prisma_7d_db_copier.py

@@ -1,7 +1,8 @@
-import pymongo
-import pandas as pd
 import datetime
 
+import pandas as pd
+import pymongo
+
 from config_info.config import *
 
 db_client = pymongo.MongoClient(DB_URL)
@@ -9,28 +10,46 @@ prisma_db = db_client["prisma-32_db"]
 
 
 def prisma_7d_past_data_copier(date, cluster):
-
     if cluster == 1:
         n7_file_template = f"n7_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
         n7_file = pd.read_csv(PATH_TO_PRISMA_1_7d_DATA + n7_file_template, sep=' ', skipinitialspace=True, header=None)
         n7_file = n7_file.dropna(axis=1, how='all')
+        for i in range(len(n7_file[0])):
+            if type(n7_file[0][i]) is str:
+                n7_file.loc[i,0] = float('.'.join(n7_file.loc[i,0].split(',')))
         print("Data file: {}".format(PATH_TO_PRISMA_1_7d_DATA + n7_file_template))
+        fix_end_time_series = n7_file[0].lt(n7_file[0].shift())
+        bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
     else:
         n7_file_template = f"2n7_{date.month:02}-{date.day:02}.{date.year - 2000:02}"
         n7_file = pd.read_csv(PATH_TO_PRISMA_2_7d_DATA + n7_file_template, sep=' ', skipinitialspace=True, header=None)
         n7_file = n7_file.dropna(axis=1, how='all')
+        for i in range(len(n7_file[0])):
+            if type(n7_file[0][i]) is str:
+                n7_file.loc[i,0] = float('.'.join(n7_file.loc[i,0].split(',')))
         print("Data file: {}".format(PATH_TO_PRISMA_2_7d_DATA + n7_file_template))
+        fix_end_time_series = n7_file[0].lt(n7_file[0].shift())
+        bad_end_time_index = fix_end_time_series[fix_end_time_series == True].index
     for index in range(len(n7_file.index)):
         params = list(n7_file.iloc[index])
-        if type(params[0]) is str:
-            params[0] = float('.'.join(params[0].split(',')))
+        # if type(params[0]) is str:
+        #     params[0] = float('.'.join(params[0].split(',')))
         event_time = str(datetime.timedelta(seconds=params[0]))  # перевод в utc-формат
         # event_date = (datetime.timedelta(seconds=params[0]) + datetime.timedelta(hours=3)).date()
         event_datetime = datetime.datetime(date.year, date.month, date.day, int(event_time.split(':')[0]),
                                            int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
                                            int(round(
                                                float(event_time.split(':')[2]) - int(float(event_time.split(':')[2])),
-                                               2) * 10 ** 6)) - datetime.timedelta(hours=3)
+                                               2) * 10 ** 6)) - datetime.timedelta(hours=4)
+        if index >= bad_end_time_index:
+            new_date = date + datetime.timedelta(days=1)
+            event_datetime = datetime.datetime(new_date.year, new_date.month, new_date.day,
+                                               int(event_time.split(':')[0]),
+                                               int(event_time.split(':')[1]), int(float(event_time.split(':')[2])),
+                                               int(round(
+                                                   float(event_time.split(':')[2]) - int(
+                                                       float(event_time.split(':')[2])),
+                                                   2) * 10 ** 6)) - datetime.timedelta(hours=4)
         trigger = params[2]
         amp = [int(params[j]) for j in range(3, 19)]
 

+ 25 - 0
runner.py

@@ -0,0 +1,25 @@
+import datetime
+from noSQL_db_Prisma import NoSQLPrisma
+
+date_time_start = datetime.date(2021, 12, 1)  # посмотреть почему не собирается конец дня 2018-04-22
+date_time_stop = datetime.date(2021, 12, 31)
+LIST_OF_DATES = [(date_time_start + datetime.timedelta(days=i)) for i in
+                 range((date_time_stop - date_time_start).days + 1)]
+for date in LIST_OF_DATES:
+    try:
+        NoSQLPrisma(cluster=1, single_date=date).prisma_12d_past_data_copier()
+    except FileNotFoundError:
+        print(f'n-файла 1-го кластера от {date} не существует')
+    try:
+        NoSQLPrisma(cluster=2, single_date=date).prisma_12d_past_data_copier()
+    except FileNotFoundError:
+        print(f'n-файла 2-го кластера от {date} не существует')
+    try:
+        NoSQLPrisma(cluster=1, single_date=date).prisma_7d_past_data_copier()
+    except FileNotFoundError:
+        print(f'n7-файла 1-го кластера от {date} не существует')
+    try:
+        NoSQLPrisma(cluster=2, single_date=date).prisma_7d_past_data_copier()
+    except FileNotFoundError:
+        print(f'n7-файла 2-го кластера от {date} не существует')
+print('test')