瀏覽代碼

make reader n7 faster

acoustic925 2 年之前
父節點
當前提交
0f6183ece3
共有 4 個文件被更改,包括 37 次插入19 次删除
  1. 1 1
      file_reader/db_file_reader.py
  2. 14 6
      file_reader/file_reader.py
  3. 8 6
      noSQL_db_Prisma.py
  4. 14 6
      runner.py

+ 1 - 1
file_reader/db_file_reader.py

@@ -44,7 +44,7 @@ class DbFileReader(FileReader):
     def concat_n_data(self, concat_n_df):
         data_cl = self.reading_db
         # noinspection PyUnresolvedReferences
-        concat_n_df = pd.concat([concat_n_df, data_cl[['Date', 'time', 'trigger'] + DbFileReader.__amp_n_cols]],
+        concat_n_df = pd.concat([concat_n_df, data_cl[['Date', 'time', 'trigger'] + self.__class__.__amp_n_cols]],
                                 ignore_index=True)
         return concat_n_df
 

+ 14 - 6
file_reader/file_reader.py

@@ -2,6 +2,9 @@ import datetime
 import pandas as pd
 
 
+# from pathlib import Path
+
+
 class FileReader:
     __amp_n_cols = []
     for i in range(1, 17):
@@ -26,20 +29,22 @@ class FileReader:
         pass
 
     def making_file_path(self, file_type):
-        file_path = f'{self.path_to_files}\\{file_type}\\{self.cluster_n}{file_type}_{self.single_date.month:02}-{self.single_date.day:02}.{self.single_date.year - 2000:02} '
+        file_path = f'{self.path_to_files}\\{file_type}\\{self.cluster_n}{file_type}_{self.single_date.month:02}-{self.single_date.day:02}.{self.single_date.year - 2000:02}'
         return file_path
 
     def making_file_path_eas_p(self, file_directory, file_type):
-        file_path = f'{self.path_to_files}\\{file_directory}\\{self.cluster}{file_type}_{self.single_date.month:02}-{self.single_date.day:02}.{self.single_date.year - 2000:02}'
+        file_path = f'{self.path_to_files}/{file_directory}/{self.cluster}{file_type}_{self.single_date.month:02}-{self.single_date.day:02}.{self.single_date.year - 2000:02}'
         return file_path
 
     def reading_n_file(self):
         """Метод, прочитывающий n-файлы, возвращающий датафрейм дня на выходе. Или возвращающий filenotfounderror, если
                 файла нет"""
+        print(self.n_file_path)
         n_file = pd.read_csv(self.n_file_path,
                              sep=r'\s[-]*\s*', header=None, skipinitialspace=True, index_col=False, engine='python')
         n_file.dropna(axis=1, how='all', inplace=True)
-        n_file.columns = ['time', 'number', 'sum_n', 'trigger'] + FileReader.__amp_n_cols
+        n_file.columns = ['time', 'number', 'sum_n', 'trigger'] + self.__class__.__amp_n_cols
+        n_file = n_file[n_file['time'] < 86400]
         time_difference = n_file['time'].diff()
         bad_end_time_index = time_difference[time_difference < -10000].index
         if any(bad_end_time_index):
@@ -49,18 +54,20 @@ class FileReader:
         return n_file, []
 
     def reading_n7_file(self):
+        print(self.n7_file_path)
         n7_file = pd.read_csv(self.n7_file_path,
                               sep=r'\s[-]*\s*', header=None, skipinitialspace=True, index_col=False, engine='python')
         n7_file.dropna(axis=1, how='all', inplace=True)
-        for i in range(len(n7_file[0])):
-            if type(n7_file[0][i]) is str:
-                n7_file.loc[i, 0] = float('.'.join(n7_file.loc[i, 0].split(',')))
+        n7_file[0] = n7_file[0].apply(lambda x: str(x).replace(',', '.')) # add this rows to file-twink
+        n7_file = n7_file.astype({0: float})
+        n7_file = n7_file[n7_file[0] < 86400]
         time_difference = n7_file[0].diff()
         bad_end_time_index = time_difference[time_difference < -10000].index
         if any(bad_end_time_index):
             n7_file_today = n7_file[n7_file.index < bad_end_time_index[0]]
             n7_file_day_after = n7_file[n7_file.index >= bad_end_time_index[0]]
             return n7_file_today, n7_file_day_after
+
         return n7_file, []
 
     @staticmethod
@@ -113,6 +120,7 @@ class FileReader:
                                  columns=['time', 'number', 'sum_n', 'trigger', 'time_delay', 'detectors',
                                           'n_per_step'])
         t_file_df = t_file_df.astype({"time": float, "number": int, "sum_n": int, "trigger": int})
+        t_file_df = t_file_df[t_file_df["time"] < 86400]
         return t_file_df
 
     def reading_p_file(self):

+ 8 - 6
noSQL_db_Prisma.py

@@ -6,6 +6,9 @@ from config_info.config import *
 from file_reader.file_reader import FileReader
 
 
+# from pathlib import Path
+
+
 # noinspection DuplicatedCode
 class NoSQLPrisma:
     __DB_URL = DB_URL
@@ -16,10 +19,10 @@ class NoSQLPrisma:
         self.cluster = cluster
         self.single_date = single_date
         self.file_reader = FileReader(cluster=self.cluster, single_date=self.single_date,
-                                      path_to_files=f"D:\\PRISMA20\\P{self.cluster}")
+                                      path_to_files=f'z:\\PRISMA-32\\DataArchive\\P{self.cluster}\\data{self.single_date.year}')
 
-    def __del__(self):
-        pass
+    # def __del__(self):
+    #     pass
 
     def dinods_data_copier(self, event_datetime, trigger, det_params, dinode):
         try:
@@ -40,9 +43,8 @@ class NoSQLPrisma:
             print(f'Ошибка - {event_datetime.date()}-{event_datetime.time()}')
 
     def prisma_12d_past_data_copier(self):
-
-        t_file = self.file_reader.reading_t_file()
         n_file_today, n_file_day_after = self.file_reader.reading_n_file()
+        t_file = self.file_reader.reading_t_file()
         n_file_today = n_file_today.merge(t_file)
         self.make_parameters_from_df_12_d(n_file_today, self.single_date)
         if any(n_file_day_after):
@@ -115,4 +117,4 @@ class NoSQLPrisma:
                 }
             self.dinods_data_copier(event_datetime=event_datetime, trigger=trigger,
                                     det_params=det_params, dinode=7)
-            return None
+        return None

+ 14 - 6
runner.py

@@ -1,25 +1,33 @@
 import datetime
 from noSQL_db_Prisma import NoSQLPrisma
 
-date_time_start = datetime.date(2021, 10, 1)  # посмотреть почему не собирается конец дня 2018-04-22
-date_time_stop = datetime.date(2021, 10, 31)
+
+date_time_start = datetime.date(2022, 9, 14)  # посмотреть почему не собирается конец дня 2018-04-22
+# date_time_stop = datetime.date(2020, 10, 28)
+date_time_stop = datetime.date.today()
 LIST_OF_DATES = [(date_time_start + datetime.timedelta(days=i)) for i in
                  range((date_time_stop - date_time_start).days + 1)]
 for date in LIST_OF_DATES:
     try:
         NoSQLPrisma(cluster=1, single_date=date).prisma_12d_past_data_copier()
     except FileNotFoundError:
-        print(f'n-файла 1-го кластера от {date} не существует')
+        print('нет')
+        with open('files_not_found.txt', 'a+') as f:
+            f.write(f'n-файла 1-го кластера от {date} не существует\n')
     try:
         NoSQLPrisma(cluster=2, single_date=date).prisma_12d_past_data_copier()
     except FileNotFoundError:
-        print(f'n-файла 2-го кластера от {date} не существует')
+
+        with open('files_not_found.txt', 'a+') as f:
+            f.write(f'n-файла 2-го кластера от {date} не существует\n')
     try:
         NoSQLPrisma(cluster=1, single_date=date).prisma_7d_past_data_copier()
     except FileNotFoundError:
-        print(f'n7-файла 1-го кластера от {date} не существует')
+        with open('files_not_found.txt', 'a+') as f:
+            f.write(f'n7-файла 1-го кластера от {date} не существует\n')
     try:
         NoSQLPrisma(cluster=2, single_date=date).prisma_7d_past_data_copier()
     except FileNotFoundError:
-        print(f'n7-файла 2-го кластера от {date} не существует')
+        with open('files_not_found.txt', 'a+') as f:
+            f.write(f'n7-файла 2-го кластера от {date} не существует\n')
 print('test')