From 310fa9b66af936c3ec335d5712d59e6e1e6f744e Mon Sep 17 00:00:00 2001 From: vlw Date: Thu, 1 Jan 2026 00:46:25 +0100 Subject: [PATCH] refactor: streamlined config file structure and config value referencing (#1) This PR refactors the config file structure into a simpler format, as well as making it more capable at the same time. Config values are now accessed as properties instead of referenced on the parsed config JSON list directly. Config files are also loaded with the `-i` argument which can take any compatible JSON file, this replaces the previous `-a` (autorun) argument. Reviewed-on: https://codeberg.org/vlw/3rd/pulls/1 Co-authored-by: vlw Co-committed-by: vlw --- .example.config.json | 40 ++++----- .gitignore | 2 +- README.md | 82 +++++++----------- run.py | 23 +++--- src/Archive/Archive.py | 30 +++---- src/Archive/Filesystem.py | 6 +- src/Config.py | 169 +++++++++++++++++++++++++++++++++++--- src/Enums.py | 7 ++ src/Upload/Aws.py | 10 +-- 9 files changed, 239 insertions(+), 130 deletions(-) diff --git a/.example.config.json b/.example.config.json index e5e3d3b..f39a72f 100644 --- a/.example.config.json +++ b/.example.config.json @@ -1,26 +1,16 @@ -{ - "config": { - "cloud": { - "bucket": "my-bucket" - }, - "archive": { - "password": "mypassword", - "compression_level": 10, - "default_path_temp": "/tmp/" - } +[ + { + "password": "mypassword", + "compression": 10, + "abspath_temp": null, + "abspath_target": "", + "abspath_destination": "s3:///" }, - "archive": [ - { - "compress": true, - "path_temp": null, - "path_target_to": "", - "path_target_from": "" - }, - { - "compress": true, - "path_temp": null, - "path_target_to": "", - "path_target_from": "" - } - ] -} + { + "password": "mypassword", + "compression": 10, + "abspath_temp": null, + "abspath_target": "", + "abspath_destination": "s3:///" + } +] diff --git a/.gitignore b/.gitignore index b17b62f..974fcfd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -.config.json +.example.config.json __pycache__ diff --git a/README.md b/README.md index 224c247..e0b3de8 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,14 @@ # 3rd -A script to automate the "off-site copy" in the 3-2-1 Backup strategy with encryption, uploading to AWS S3, and independent definition of compression method and [temporary] archive storage locations for uploading large archives to S3, with support for independent configurations for subdirectories as well. +A script to automate the 3rd "off-site copy" step in the 3-2-1 Backup strategy. Each directory have independent configuration of compression level, encryption password, AWS S3 destination, and temporary storage location while being uploaded to S3. This script is a wrapper for the AWS CLI `aws` and the 7zip CLI `7z` and is meant to be run on Linux. Other operating systems are untested. -## Key features -- Archive encryption before uploading to AWS S3. -- Independent compression level, archive location, S3 storage location, for directories and subdirectories. - # Installation Make sure you have the following prerequisites before starting: -- Python 3 installed. -- The [7zip CLI](https://www.7-zip.org/download.html) installed. -- The [AWS CLI](https://aws.amazon.com/cli/) installed and configured with write access to your target bucket. +- Python 3 +- The [7zip CLI](https://www.7-zip.org/download.html) +- The [AWS CLI](https://aws.amazon.com/cli/) +- Write permission to an AWS S3 bucket 1. **Clone this repository** @@ -30,10 +27,10 @@ cp -p .example.config.json .config.json [See the config file documentation for more information](#config). -4. **Run `run.py` in autorun mode** +4. **Run `run.py` with your config file** ``` -python3 run.py -a +python3 run.py -i .config.json ``` [See the CLI section for a list of all available arguments](#cli). @@ -41,7 +38,7 @@ python3 run.py -a ## Optional cron Schedule this backup script to run with a crontab entry, for example: ``` -30 2 * * 3 cd /opt/3rd && /usr/bin/python3 run.py -a +30 2 * * 3 cd /opt/3rd && /usr/bin/python3 run.py -i .config.json ``` Which will run at 2:30 each Wednesday. @@ -49,41 +46,16 @@ Which will run at 2:30 each Wednesday. The config file `.config.json` is used to define parameters and which directories to archive (in autorun mode). ```json -{ - "config": { - "cloud": { - // Name of the target AWS S3 bucket - "bucket": "vlw-test" - // .. More options to come (probably) - }, - // Default settings for each archive item - "archive": { - // The password used to encrypt all archives - "password": "mypassword", - // The compression level to use when "compress" is true for an item - "compression_level": 10, - // Default archive location when "path_temp" is null for an item - "default_path_temp": "/tmp/output" - } +[ + { + "password": "mypassword", // AES-256 encryption password. Set to false to disable encryption + "compression": 10, // Compression level between 0-10, where 0 is STORE and 10 is max compression. Set to 0 or false/null to disable compression + "abspath_temp": "/tmp", // Directory to store the created archive while it's being uploaded to S3. Set to false/null to use the system temp-directory + "abspath_target": "", // An ABSOLUTE path to the directory or file to archive + "abspath_destination": "s3:///" // A fully qualified AWS S3 URL }, - // Array of archive items, see next section - "archive": [] -} - -``` - -Each archive item uses the following structure: -```json -{ - // Enables or disables compression for this directory. STORE will be used if disabled. - "compress": true, - // Store the encrypted archive in this directory temporarily while its being uploaded to S3. - "path_temp": "/tmp/", - // The relative path from the bucket root directory to store the uploaded object - "path_target_to": "/myarchive.7z", - // An absolute path (very important) to the target folder to upload - "path_target_from": "/my/archive" -} + // etc.. +] ``` ## Common parent directories @@ -92,19 +64,21 @@ One of the key features of this script is that it can perform different archivin If you have the directory `/my/archive` with the following config: ```json { - "compress": true, - "path_temp": null, - "path_target_to": "/myarchive.7z", - "path_target_from": "/my/archive" + "password": "mypassword", + "compression": 10, + "abspath_temp": null, + "abspath_target": "/my/archive", + "abspath_destination": "s3://my-bucket/archive.7z" } ``` And a subdirectory `/my/archive/subdirectory` with the following config: ```json { - "compress": true, - "path_temp": null, - "path_target_to": "/my-subdirectory.7z", - "path_target_from": "/my/archive/subdirectory" + "password": "mypassword", + "compression": 10, + "abspath_temp": null, + "abspath_target": "/my/archive/subdirectory", + "abspath_destination": "s3://my-bucket/subdirectory.7z" } ``` The `/my/archive/subdirectory` will be **excluded** from the `/my/archive` archive since it has an overriding archive configuration. @@ -115,6 +89,6 @@ Available command line argument with `run.py`: arg|Name|Default|Description --|--|--|-- `-s`|`--sleep`|2|Set a global sleep duration between commands -`-a`|`--autorun`|False|Archive each item in the .config.json archive array +`-i`|`--input`|*None*|Path to a config file to load `-d`|`--dryrun`|False|Perform a dry run. Archives will not be uploaded to S3. `-l`|`--log-level`|`StdoutLevel.STANDARD`|Set a custom log level when printing to the console. See `/src/Enums.py#StdoutLevel` \ No newline at end of file diff --git a/run.py b/run.py index cc0eb57..95a80d2 100644 --- a/run.py +++ b/run.py @@ -9,21 +9,18 @@ from src.Enums import StdoutLevel, Namespace stdout = Stdout(Namespace.CLI) -def autorun() -> None: +def main(file: str) -> None: """ - Autorun + Autorun from a config file + + Args: + file (str): Path to the config file to load """ - for item in Config().config["archive"]: - stdout.info(f"Autorun: {item}") - - Aws(Archive(item)).upload() - -if __name__ == "__main__": parser = argparse.ArgumentParser(description="Testing") parser.add_argument("-s", "--sleep", type=int, help="Global log sleep level") - parser.add_argument("-a", "--autorun", action="store_true", help="Autorun") + parser.add_argument("-i", "--input", action="store_true", help="Load config file from path") parser.add_argument("-d", "--dryrun", action="store_true", help="Dry run") parser.add_argument("-l", "--log-level", type=str, help="Global log level") @@ -49,8 +46,10 @@ if __name__ == "__main__": stdout.log("Starting...") - # Autorun archives from config - if args.autorun: - autorun() + for item in Config.from_json_file(args.input): + Archive(item) stdout.log("Finished!") + +if __name__ == "__main__": + main() diff --git a/src/Archive/Archive.py b/src/Archive/Archive.py index 0c18cfd..406c114 100644 --- a/src/Archive/Archive.py +++ b/src/Archive/Archive.py @@ -10,17 +10,17 @@ from .Filesystem import PATH_MANIFEST, Filesystem from ..Enums import Namespace, Format, StdoutLevel class Archive(): - def __init__(self, item: dict): + def __init__(self, item: Config): """ Create a new Archive instance for a target item Args: - item (dict): A dictionary of archive instructions + item (Config): Target item to archive """ self.item = item - self.__fs = Filesystem(self.item["path_target_from"]) - self.__config = Config().config["config"] + + self.__fs = Filesystem(self.item.abspath_target) self.__stdout = Stdout(Namespace.ARCHIVE) if self.__fs.valid: @@ -37,13 +37,9 @@ class Archive(): str: Absolute pathname to target zip file """ - output_path = self.__config["archive"]["default_path_temp"] + filename = hashlib.md5(self.item.abspath_target.encode()).hexdigest() - # Override temporary file location if specified - if self.item["path_temp"]: - output_path = self.item["path_temp"] - - return f"{output_path.rstrip('/')}/{hashlib.md5(self.item['path_target_from'].encode()).hexdigest()}.7z" + return f"{self.item.abspath_temp.rstrip('/')}/{filename}.7z" def cleanup(self) -> None: """ @@ -58,31 +54,31 @@ class Archive(): Skip archiving of target item """ - self.__stdout.warn(f"Archiving skipped for: {self.item['path_target_from']}") + self.__stdout.warn(f"Archiving skipped for: {self.item.abspath_target}") def __compress(self) -> None: """ Compress the target path """ - self.__stdout.log(f"Starting compression for: {self.item['path_target_from']}").sleep() + self.__stdout.log(f"Starting compression for: {self.item.abspath_target}").sleep() # Prepare command line arguments args = [ "7z", "a", "-t7z", - f"-mx={self.__config['archive']['compression_level']}" + f"-mx={self.item.compression}" ] # Enable encryption if archive password is set - if self.__config["archive"]["password"]: + if self.item.password: args.append("-mhe=on") - args.append(f"-p{self.__config['archive']['password']}") + args.append(f"-p{self.item.password}") # Append output path and file list manifest arguments for 7zip args.append(self.output_path) - args.append(self.item["path_target_from"]) + args.append(self.item.abspath_target) # Exclude directories thats for exclude in self.__fs.common_relative_paths(): @@ -96,6 +92,6 @@ class Archive(): return self.__die() self.__stdout.info(f"Temporary archive placed at: {self.__fs.path}").sleep() - self.__stdout.ok(f"Compression completed for: {self.item['path_target_from']}") + self.__stdout.ok(f"Compression completed for: {self.item.abspath_target}") cmd.cleanup() diff --git a/src/Archive/Filesystem.py b/src/Archive/Filesystem.py index 8148de1..440829a 100644 --- a/src/Archive/Filesystem.py +++ b/src/Archive/Filesystem.py @@ -1,13 +1,10 @@ import os -import tempfile from typing import Union from ..Config import Config from ..Stdout import Stdout from ..Enums import Namespace -PATH_MANIFEST = f"{tempfile.gettempdir().rstrip('/')}/archive_manifest.txt" - class Filesystem(): def __init__(self, path: str): """ @@ -19,7 +16,6 @@ class Filesystem(): self.valid = True self.path = path - self.__config = Config().config self.__stdout = Stdout(Namespace.FILESYSTEM) if not os.path.exists(self.path): @@ -37,7 +33,7 @@ class Filesystem(): paths = [] - for item in self.__config["archive"]: + for item in Config.pathnames: paths.append(item["path_target_from"]) return paths diff --git a/src/Config.py b/src/Config.py index 13c57de..464be0d 100644 --- a/src/Config.py +++ b/src/Config.py @@ -1,25 +1,174 @@ import json -import typing +import tempfile from pathlib import Path +from typing import Self, Union -CONFIG_FILEPATH = Path.cwd() / ".config.json" +from .Enums import ConfigKeys class Config(): - def __init__(self): + pathnames = set() + + @staticmethod + def for_each(items: list) -> Self: + """ + Returns a generator which iterates over each item in a list of item configs + + Args: + items (list): The list to iterate over + + Returns: + Self: Instance of the Config class + + Yields: + Iterator[Self]: Config class for the current item + """ + + for item in items: + yield Config(item) + + @staticmethod + def from_json_file(pathname: str) -> Self: + """ + Load item configs from a JSON file + + Args: + pathname (str): _description_ + + Returns: + Self: _description_ + """ + + with open(pathname, "r") as f: + json = json.load(f) + + for item in json: + Config.pathnames.add(item[ConfigKeys.ABSPATH_TARGET.value]) + + Config.for_each(json) + + @staticmethod + def __throw_missing_key(key: ConfigKeys) -> None: + """ + Raises a KeyError for an item config key if it does not exist + + Args: + key (ConfigKeys): The key to raise an error for + + Raises: + KeyError: Raised from an item config key + """ + + raise KeyError(f"Expected required item config key '{key.value}' but it was not found") + + @staticmethod + def __throw_value_error(key: ConfigKeys, expected_type: str) -> None: + """ + Raise a ValueError for a key with an expected type + + Args: + key (ConfigKeys): The item config key to raise an error for + expected_type (str): The expected type + + Raises: + ValueError: Raised from an item config key and expected value type + """ + + raise ValueError(f"Item config key '{key.value}' expects a value of type {expected_type}") + + def __init__(self, item: dict): """ Create a new Config instance """ - with open(CONFIG_FILEPATH, "r") as f: - self.__config = json.load(f) + self.__item = item @property - def config(self) -> dict: + def password(self) -> str|False: """ - Returns config variables as a dictonary + Returns the password for this item, or None if unset Returns: - dict: Confg values + str|False: Password or None if no password is set """ - - return dict(self.__config) + + if not self.__key_exists(ConfigKeys.PASSWORD.value): + return False + + return self.__item[ConfigKeys.PASSWORD.value] if isinstance(self.__item[ConfigKeys.PASSWORD.value], str) else None + + @property + def compression(self) -> int: + """ + Returns the compression level for this item, or false if STORE mode should be used + + Returns: + str|False: Compression level for this item, false if compression is disabled + """ + + if not self.__key_exists(ConfigKeys.COMPRESSION.value): + return 0 + + if not isinstance(self.__item[ConfigKeys.COMPRESSION.value], int) or self.__item[ConfigKeys.COMPRESSION.value] == 0: + return 0 + + return max(1, min(self.__item[ConfigKeys.COMPRESSION.value], 10)) + + @property + def abspath_temp(self) -> str: + """ + Returns the path to the directory where the created archive will be stored until it's uploaded + + Returns: + str: Absolute path to the destination directory + """ + + if not self.__key_exists(ConfigKeys.ABSPATH_TEMP.value): + return tempfile.gettempdir() + + return self.__item[ConfigKeys.ABSPATH_TEMP.value] if isinstance(self.__item[ConfigKeys.ABSPATH_TEMP.value], str) else tempfile.gettempdir() + + @property + def abspath_target(self) -> str: + """ + Returns an absolute path to the target to be archived + + Returns: + str: Absolute path to the target + """ + + if not self.__key_exists(ConfigKeys.ABSPATH_TARGET.value): + return Config.__throw_missing_key(ConfigKeys.ABSPATH_TARGET) + + if not isinstance(self.__item[ConfigKeys.ABSPATH_TARGET.value], str): + return Config.__throw_value_error(ConfigKeys.ABSPATH_TARGET, str) + + return self.__item[ConfigKeys.ABSPATH_TARGET.value] + + @property + def abspath_destination(self) -> str: + """ + Returns an absolute path to the target to be archived + + Returns: + str: Absolute path to the target + """ + + if not self.__key_exists(ConfigKeys.ASBPATH_DESTINATION.value): + return Config.__throw_missing_key(ConfigKeys.ASBPATH_DESTINATION) + + if not isinstance(self.__item[ConfigKeys.ASBPATH_DESTINATION.value], str): + return Config.__throw_value_error(ConfigKeys.ASBPATH_DESTINATION, str) + + return self.__item[ConfigKeys.ASBPATH_DESTINATION.value] + + def __key_exists(self, key: str) -> bool: + """ + Returns true if a property key is defined for the current item + + Args: + key (str): The key to test + + Returns: + bool: True if key exists + """ + return key in self.__item diff --git a/src/Enums.py b/src/Enums.py index ae9b1d8..3b23be4 100644 --- a/src/Enums.py +++ b/src/Enums.py @@ -1,5 +1,12 @@ from enum import Enum +class ConfigKeys(Enum): + PASSWORD = "password" + COMPRESSION = "compression" + ABSPATH_TEMP = "abspath_temp" + ABSPATH_TARGET = "abspath_target" + ASBPATH_DESTINATION = "abspath_destination" + class Namespace(Enum): AWS = "AWS" CLI = "Command" diff --git a/src/Upload/Aws.py b/src/Upload/Aws.py index 297f300..f56997c 100644 --- a/src/Upload/Aws.py +++ b/src/Upload/Aws.py @@ -1,7 +1,6 @@ import typing from ..Cli import Cli -from ..Config import Config from ..Stdout import Stdout from ..Enums import Namespace, StdoutLevel from ..Archive.Archive import Archive @@ -18,7 +17,6 @@ class Aws(): """ self.archive = archive - self.__config = Config().config self.__stdout = Stdout(Namespace.AWS) def upload(self) -> None: @@ -26,7 +24,7 @@ class Aws(): Create a backup of an Archive instance to AWS """ - self.__stdout.log(f"Starting upload of archive for: {self.archive.item['path_target_from']}") + self.__stdout.log(f"Starting upload of archive for: {self.archive.item.abspath_target}") self.__stdout.debug(f"Archive object: {self.archive}") args = [ @@ -34,7 +32,7 @@ class Aws(): "s3", "cp", self.archive.output_path, - f"s3://{self.__config['config']['cloud']['bucket']}/{self.archive.item['path_target_to'].strip('/')}" + self.archive.item.abspath_destination ] if Aws.dry_run: @@ -47,7 +45,7 @@ class Aws(): cmd.run(args) if cmd.stderr: - self.__stdout.error(f"Failed to upload archive for: {self.archive.item['path_target_from']}") + self.__stdout.error(f"Failed to upload archive for: {self.archive.item.abspath_target}") return self.__stdout.info("Cleaning up temporary files") @@ -55,4 +53,4 @@ class Aws(): cmd.cleanup() self.archive.cleanup() - self.__stdout.ok(f"Archive uploaded: {self.archive.item['path_target_from']}") + self.__stdout.ok(f"Archive uploaded: {self.archive.item.abspath_target}")