Data_deduplication_service
Service that will use hashes to deduplicate files
FileService.h
Go to the documentation of this file.
1 #ifndef DATA_DEDUPLICATION_SERVICE_FILESERVICE_H
2 #define DATA_DEDUPLICATION_SERVICE_FILESERVICE_H
3 
4 #include <vector>
5 #include <array>
6 #include <string>
7 #include <iostream>
8 
9 
10 #include "dbManager.h"
11 #include "fileUtils.h"
12 
14 namespace file_services {
15 
16  using namespace db_services;
17 
24  class FileService {
25  public:
27 
29  = default;
30 
40  template<dbUsageStrategy dbUsageStrategy = use>
41  int dbLoad(std::string_view dbName, std::string_view configurationFile = db_services::cfileName);
42 
50  template<dbUsageStrategy dbUsageStrategy = use>
51  int dbLoad(db_services::myConnString &cStr);
52 
53  int dbDrop(std::string_view dbName) {
54  auto res = manager_.dropDatabase(dbName);
55  return res;
56  };
57 
65  template<dataInsetionStrategy data_insertion_str = PreserveOld>
66  int processDirectory(std::string_view dirPath, size_t segmentSize, const hash_function& hash= SHA_256);
67 
78  template<dataInsetionStrategy data_insertion_str = PreserveOld, bool existence_checks = true>
79  int processFile(std::string_view filePath, size_t segmentSize,const hash_function& hash= SHA_256);
80 
85  int insertDirEntry(std::string_view dirPath);
86 
90  tl::expected<double,int> getCoefficient();
94  tl::expected<std::array<int, 5>, int> getDataD();
95 
103  template<rootDirectoryHandlingStrategy root_directory_str = NoCreateMain,
104  dataRetrievalStrategy retrievalStrategy = Persist>
105  int loadDirectory(std::string_view fromDir, std::string_view toDir);
106 
115  dataRetrievalStrategy retrievalStrategy = Persist, bool from_load_dir = false>
116  int loadFile(std::string_view fromFile, std::string_view toFile,
118 
123  int deleteFile(std::string_view filePath);
124 
129  int deleteDirectory(std::string_view dirPath);
130 
132  return manager_.checkConnection();
133  }
134 
135  int clearSegments();
136 
141  template<typename ResType1, typename ... Args>
142  tl::expected<ResType1, int> executeInTransaction(ResType1
143  (*call)(db_services::trasnactionType &, Args ...),
144  Args &&... args) {
145  return manager_.executeInTransaction(call, std::forward<Args>(args)...);
146  }
147 
153  template<typename ResType1, typename ... Args>
154  tl::expected<ResType1, int>
155  executeInTransaction(const std::function<ResType1(db_services::trasnactionType &, Args ...)> &call,
156  Args &&... args) {
157  return manager_.executeInTransaction(call, std::forward<Args>(args)...);
158  }
159 
160  void inline disconnect() {
161  manager_.disconnect();
162  }
163 
164  private:
165  dbManager manager_;
166  };
167 
168 
169  template<rootDirectoryHandlingStrategy dir_s, dataRetrievalStrategy rr, bool from_load_dir>
170  int FileService::loadFile(std::string_view fromFile, std::string_view toFile,
171  indexType fileId) {
172  fs::path toFilePath;
173  fs::path fromFilePath;
174  fs::path parentDirPath;
175  try {
176  toFilePath = getNormalAbs(toFile);
177  parentDirPath = toFilePath.parent_path();
178  fromFilePath = getNormalAbs(fromFile);
179 
180  if (!fs::exists(parentDirPath)) {
181  if constexpr (dir_s == CreateMain) {
182  fs::create_directories(parentDirPath);
183  VLOG(2)
184  << vformat("Root directory \"%s\" was created successfully",
185  parentDirPath.c_str());
186  } else {
187  VLOG(1)
188  << vformat("\"%s\" no such file or directory\n", parentDirPath.c_str());
190  }
191  }
192  if constexpr (dir_s == NoCreateMain) {
193 
194  if (!fs::is_directory(parentDirPath)) {
195  VLOG(1) << vformat("\"%s\" is not a directory use procesFile for files\n",
196  parentDirPath.c_str());
198  }
199  }
200 
201  }
202  catch (const fs::filesystem_error &e) {
203  VLOG(1) << vformat("Filesystem error : %s , error code %d\n", e.what(), e.code());
205  }
206  if (fs::is_directory(toFilePath)) {
207  VLOG(1) << vformat("Entry %s is not a file use processDirectory for directories\n", toFilePath.c_str());
209  }
210 
211  std::basic_ofstream<symbolType> out(toFilePath.c_str());
212 
213  auto streamRes = manager_.getFileStreamed(fromFilePath.string(), out, fileId);
214 
215  out.close();
216 
217  if (streamRes == returnCodes::ErrorOccured) {
218  VLOG(1) << vformat("Error occurred during "
219  "file \"%s\" streaming",
220  fromFilePath.c_str());
221  return streamRes;
222  }
223 
224  if constexpr (!from_load_dir && rr == dataRetrievalStrategy::Remove) {
225 
226  auto delRes = manager_.deleteFile(fromFilePath.string());
227 
228  if (delRes == returnCodes::ErrorOccured) {
229  VLOG(1) << vformat("Error occurred during "
230  "file \"%s\" deletion",
231  fromFilePath.c_str());
232  return delRes;
233  }
234  }
236  }
237 
238 
239  template<rootDirectoryHandlingStrategy dir_s, dataRetrievalStrategy rr>
240  int FileService::loadDirectory(std::string_view fromDir, std::string_view toDir) {
241  fs::path newDirPath;
242  fs::path fromDirPath = getNormalAbs(fromDir);
243 
244 
245  try {
246  if (!fs::exists(toDir)) {
247  if constexpr (dir_s == CreateMain) {
248  fs::create_directories(toDir);
249  VLOG(2)
250  << vformat("Root directory \"%s\" was created successfully", toDir.data());
251  } else {
252  VLOG(1) << vformat("\"%s\" no such file or directory\n", toDir.data());
254  }
255  }
256  if constexpr (dir_s == NoCreateMain) {
257 
258  if (!fs::is_directory(toDir)) {
259  VLOG(1) << vformat("\"%s\" is not a directory change to_dir path\n",
260  toDir.data());
262  }
263  }
264  newDirPath = getNormalAbs(toDir);
265  } catch (const fs::filesystem_error &e) {
266  VLOG(1) << vformat("Filesystem error : %s , error code %d\n", e.what(), e.code());
268  }
269 
270  if (!fs::is_directory(newDirPath)) {
271  VLOG(1) << vformat("Entry %s is not a directory use processFile for files\n", newDirPath.c_str());
273  }
274  auto files = manager_.getAllFiles(fromDirPath.string());
275  if (files.empty()) {
276  VLOG(1) << vformat("No files found for directory %s", fromDir.data());
277  }
278 
279 
280  for (const std::pair<db_services::indexType, std::string> &pair: files) {
281  std::string filePath = pair.second;
282 
283  auto newDirRealPath = newDirPath / fs::path(pair.second).lexically_relative(fromDirPath);
284 
285  auto result = this->template loadFile<rootDirectoryHandlingStrategy::CreateMain, rr, true>
286  (pair.second, newDirRealPath.string(), pair.first);
287  if (result == returnCodes::ErrorOccured) {
288  VLOG(1) << vformat("Error occurred during "
289  "file \"%s\" retrieval",
290  fromDirPath.c_str());
291  continue;
292  }
293  }
294 
295  if constexpr (rr == dataRetrievalStrategy::Remove) {
296  manager_.deleteDirectory(fromDirPath.string());
297  }
299  }
300 
301 
302  template<dbUsageStrategy str>
303  int FileService::dbLoad(std::string_view dbName, std::string_view configurationFile) {
304  auto cString = db_services::loadConfiguration(configurationFile);
305  cString.setDbname(dbName);
306  return dbLoad<str>(cString);
307  }
308 
309 
310  template<dbUsageStrategy str>
312  manager_ = dbManager(cStr);
313 
314  if constexpr (str == create) {
315 
316  auto reusult = manager_.createDatabase(cStr.getDbname());
317 
318  if (reusult == returnCodes::ErrorOccured) {
319  VLOG(1) << vformat("Error occurred during database \"%s\" creation\n", cStr.getDbname().data());
321  }
322 
323  reusult = manager_.fillSchemas();
324 
325  if (reusult == returnCodes::ErrorOccured) {
326  VLOG(1)
327  << vformat("Error occurred during database's \"%s\" schema's creation\n",
328  cStr.getDbname().data());
330  }
331  } else {
332 
333  auto res = manager_.connectToDb();
334  if (res == returnCodes::ErrorOccured) {
335  VLOG(1)
336  << vformat("Error occurred during database's \"%s\" schema's creation\n",
337  cStr.getDbname().data());
339  }
340  }
341  VLOG(2) << ((manager_.checkConnection()) ? "connection established\n" : "cannot connect\n");
342  return ReturnSucess;
343  }
344 
345 
346  template<dataInsetionStrategy strategy, bool existence_checks>
347  int FileService::processFile(std::string_view filePath, size_t segmentSize,const hash_function& hash) {
348  std::string file;
349  if constexpr (existence_checks) {
350  auto result = checkFileExistence(filePath);
351  if (!result.has_value()) {
353  }
354  file = result.value();
355  } else {
356  file = filePath;
357  }
358 
359  auto size = fs::file_size(file);
360 
361  gClk.tik();
362  auto fileId = manager_.createFile(file, size, segmentSize, hash);
363  gClk.tak();
364 
365  if (fileId == returnCodes::AlreadyExists) {
366  if (strategy == PreserveOld) {
368  }
369 
370  auto res = manager_.deleteFile(file, fileId);
371 
372  if (res == returnCodes::ErrorOccured) {
373  VLOG(1)
374  << vformat("Error occurred during insert/replace.\n File path \"%s\"!",
375  file.c_str());
377  }
378  fileId = manager_.createFile(file, size, segmentSize, hash);
379  }
380 
381  if (fileId == returnCodes::ErrorOccured) {
382  VLOG(1)
383  << vformat("Error occurred during file creation.\n File path \"%s\"!", file.c_str());
385  }
386  std::basic_ifstream<symbolType> in(file);
387 
388  gClk.tik();
389  auto res1 = manager_.insertFileFromStream(file, in, segmentSize, size,hash);
390  gClk.tak();
391 
392  if (res1 == returnCodes::ErrorOccured) {
393  VLOG(1)
394  << vformat("Error occurred during file contents streaming.\n File path \"%s\"!",
395  file.c_str());
396  return res1;
397  }
398  gClk.tik();
399  res1 = manager_.finishFileProcessing(file, fileId);
400  gClk.tak();
401 
402  if (res1 == returnCodes::ErrorOccured) {
403  VLOG(1)
404  << vformat("Error occurred during file contents processing.\n File path \"%s\"!",
405  file.c_str());
406  return res1;
407  }
409  }
410 
411 
412  template<dataInsetionStrategy strategy>
413  int FileService::processDirectory(std::string_view dirPath, size_t segmentSize, const hash_function& hash) {
414  fs::path pp;
415 
416  auto result = checkDirectoryExistence(dirPath);
417  if (!result.has_value()) {
418  return ErrorOccured;
419  }
420  pp = result.value();
421 
422  auto dd = fs::canonical(pp).string();
423 
424 
425  int res=ReturnSucess;
426 
427  auto res1=insertDirEntry(dd);
428  if(res1!=ReturnSucess)
429  {
430  res=res1;
431  }
432 
433  for (const auto &entry: fs::recursive_directory_iterator(pp)) {
434  if (!fs::is_directory(entry)) {
435  auto file = fs::canonical(entry.path()).string();
436  gClk.tik();
437  auto results = this->template processFile<strategy, false>(file, segmentSize, hash);
438  gClk.tak();
439 
440  if (results == AlreadyExists) {
441  res=AlreadyExists;
442  continue;
443  } else if (results == ErrorOccured) {
444  return results;
445  }
446  } else
447  {
448  auto dd = fs::canonical(entry.path()).string();
449  auto results=insertDirEntry(dd);
450  if (results == AlreadyExists) {
451  res=AlreadyExists;
452  }
453  }
454  }
455  return res;
456  }
457 }
458 
459 #endif //DATA_DEDUPLICATION_SERVICE_FILESERVICE_H
Database manager that handles database management.
Definition: dbManager.h:39
this class handles file/directory management and uses dbManager to perform calls
Definition: FileService.h:24
void disconnect()
Definition: FileService.h:160
int processFile(std::string_view filePath, size_t segmentSize, const hash_function &hash=SHA_256)
Definition: FileService.h:347
tl::expected< ResType1, int > executeInTransaction(const std::function< ResType1(db_services::trasnactionType &, Args ...)> &call, Args &&... args)
Definition: FileService.h:155
db_services::indexType indexType
Definition: FileService.h:26
int loadDirectory(std::string_view fromDir, std::string_view toDir)
Definition: FileService.h:240
int loadFile(std::string_view fromFile, std::string_view toFile, indexType fileId=paramType::EmptyParameterValue)
Definition: FileService.h:170
int processDirectory(std::string_view dirPath, size_t segmentSize, const hash_function &hash=SHA_256)
Definition: FileService.h:413
int dbDrop(std::string_view dbName)
Definition: FileService.h:53
bool checkConnection()
Definition: FileService.h:131
int dbLoad(std::string_view dbName, std::string_view configurationFile=db_services::cfileName)
Definition: FileService.h:303
tl::expected< ResType1, int > executeInTransaction(ResType1(*call)(db_services::trasnactionType &, Args ...), Args &&... args)
Definition: FileService.h:142
void tak(const std::source_location &location=std::source_location::current())
Definition: clockArray.h:231
void tik(const std::source_location &location=std::source_location::current())
Definition: clockArray.h:256
std::unordered_map< int, fileLoad > files
Definition: common.cpp:16
db_services namespace
Definition: dbCommon.h:17
pqxx::transaction< pqxx::isolation_level::read_committed > trasnactionType
Definition: dbCommon.h:35
int64_t indexType
Definition: dbCommon.h:34
myConnString loadConfiguration(std::string_view filename)
Definition: dbCommon.cpp:106
file services namespace
Definition: FileService.h:14
rootDirectoryHandlingStrategy
Definition: fileUtils.h:55
@ NoCreateMain
will return an error code
Definition: fileUtils.h:57
@ CreateMain
will create this directory using create_directories
Definition: fileUtils.h:59
dataRetrievalStrategy
Definition: fileUtils.h:45
@ Remove
will delete requested data from database
Definition: fileUtils.h:49
@ Persist
will leave data as is
Definition: fileUtils.h:47
tl::expected< std::string, int > checkFileExistence(std::string_view filePath)
Definition: fileUtils.cpp:101
@ PreserveOld
will ignore files that already exist
Definition: fileUtils.h:37
tl::expected< std::string, int > checkDirectoryExistence(std::string_view dirPath)
Definition: fileUtils.cpp:123
@ create
create new database if it doesn't exist
Definition: fileUtils.h:29
fs::path getNormalAbs(const fs::path &path)
hash_function
Definition: HashUtils.h:28
@ SHA_256
Definition: HashUtils.h:30
@ ErrorOccured
Definition: myConcepts.h:50
@ AlreadyExists
Definition: myConcepts.h:49
@ ReturnSucess
Definition: myConcepts.h:51
std::string vformat(const char *zcFormat,...)
Definition: myConcepts.cpp:11
@ EmptyParameterValue
Definition: myConcepts.h:58
clockType gClk
Definition: myConcepts.cpp:9
Structure to store and format connection string.
Definition: myConnString.h:15
const std::string & getDbname() const
Definition: myConnString.h:78