diff --git a/.env.example b/.env.example index 9ba18a7..2149a50 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,15 @@ -SOURCE_FOLDER= -TARGET_BUCKET= +# (Required) Absolute path to the local folder to back up +SOURCE_FOLDER="" +# (Required) Name of the remote bucket or container +TARGET_BUCKET="" -# Cloud provider "gcs, aws, azure" -SERVICE_NAME= -# Path to service account key file -SERVICE_KEY= \ No newline at end of file +# (Required) Cloud provider (gcloud, aws, azure) +SERVICE_NAME="" +# (Required) Cloud provider access string or path to key file +SERVICE_KEY="" + +# ----------------------------------------------------------- + +# (Optional) Path to log file and level +LOG_FILE="" +LOG_LEVEL="WARNING" \ No newline at end of file diff --git a/README.md b/README.md index 4dfdfd4..39a4240 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,87 @@ # Cloud Backup Backup and archive ordinary files and folders to Google Cloud, AWS or Azure. +## Why this exists in a world with Rclone (and similar) + +This script was created to solve the specific task of archiving a folder to the cloud without read access to the bucket/container and its objects. +Cloud Backup keeps an internal database of second-level files and folders which were sucessfully uploaded to the cloud. + +If a file or folder changes on disk, that specific file or folder is compressed, uploaded to the cloud, and the database gets updated. What happens to the object in the bucket is invisible to the program. + ## Get started This program requires Python 3.6 or newer with PIP. +Cloud Backups supports uploading to Google Cloud Storage, Azure Blob Storage and AWS S3. + 1. **Clone this repo** -``` +```bash git clone https://github.com/VictorWesterlund/cloud-backup ``` 2. **Install dependencies** -``` +```bash python3 -m pip install -r requirements.txt ``` 3. **Copy environment variables file** -``` +```bash cp .env.example .env ``` -4. **Edit environment variables** -Open `.env` with your text editor of choice and fill out these required variables +4. **Edit environment variables in `.env`** ```bash -# Path to the local folder to back up -SOURCE_FOLDER= -# Name of the remote bucket (destination) -TARGET_BUCKET= +# Remember to double-slash escape paths on Windows 'E:\\path\\to\\something' -# Cloud provider (gcs, s3, azure) -SERVICE_NAME= -# Path to service account key file -SERVICE_KEY= +# Absolute path to folder whose contents should be backed up +SOURCE_FOLDER="/home/me/backup/" +# Name of bucket (or "container" in Azure) +TARGET_BUCKET="home_backup" + +# Cloud provider. "gloud", "aws" or "azure" +SERVICE_NAME="aws" +# IAM authentication +# GCS: Path to keyfile or string (GOOGLE_APPLICATION_CREDENTIALS) +# Azure: "Connection string" from the Access Key to the container +# AWS: Access key ID and secret seperated by a ";" +SERVICE_KEY="SDJSBADYUAD;asid7sad123ebasdhasnk3dnsai" ``` 5. **Run backup script** -``` +```bash python3 backup.py ``` Second-level files and folders should now start uploading to your destination bucket as zip archives. Subsequent runs of the `backup.py` script will only upload changed files and folders. In-fact; modified state is cached locally and doesn't request anything from your cloud provider. + +---- + +You can also run `backup.py` on a schedule with CRON or equivalent for your system. No requests will be sent to the cloud unless a file or folder has actually changed + +## More stuff + +Here are some additional settings and commands you can try + +### Back up a second-level file +```bash +python3 backup.py file 'relative/path/from/.env' +``` + +### Resolve CRC32 to path or vice versa +```bash +python3 resolve.py '587374759' +# output: 'hello_world.txt' + +python3 resolve.py 'hello_world.txt' +# output: '587374759' +``` + +### Optional flags in `.env` +```bash +# The following intbool flags can be added to .env to override default behavior +# Their value in this demo is the "default" state + +# Archive files and folders before uploading +COMPRESS="1" +``` diff --git a/backup.py b/backup.py index 31763ab..d13a322 100644 --- a/backup.py +++ b/backup.py @@ -1,5 +1,31 @@ import sys -from src import Backup +from src import Backup, file_exists -Backup().backup_all() \ No newline at end of file +# Back up a single file by path +def file_backup(): + path = sys.argv[2] + + if len(sys.argv) < 3: + return print("Invalid argument length: Expected path to file or folder") + + if not file_exists(path): + return print(f"File or folder at '{path}' does not exist") + + return Backup().backup_item(path, False) + +def run(method): + methods = { + "file": file_backup, + "all" : Backup().backup_all + } + + if method not in methods: + return print(f"Invalid argument '{method}'") + + return methods[method]() + +if len(sys.argv) > 1: + run(sys.argv[1]) +else: + run("all") \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..91d324e --- /dev/null +++ b/install.sh @@ -0,0 +1,28 @@ +install () { + python3 -m pip install $1 +} + +install python-dotenv + +# Install Python libraries for cloud provider +case $1 in + "gcs") + install google-cloud-storage + ;; + + "azure") + install azure-storage-blob + ;; + + "aws") + install boto3 + ;; + + *) ;; +esac + +# Create .env file if it doesn't exist +if [ ! -f ".env" ]; then + cp .env.example .env + sed -i "s/SERVICE_NAME=\"\"/SERVICE_NAME=\"$1\"" .env +fi \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dd9601f..a053fe2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ python-dotenv -google-cloud-storage \ No newline at end of file +google-cloud-storage +azure-storage-blob +boto3 \ No newline at end of file diff --git a/resolve.py b/resolve.py new file mode 100644 index 0000000..35b5c7a --- /dev/null +++ b/resolve.py @@ -0,0 +1,23 @@ +import sys + +from src import Database + +class Resolve(Database): + def __init__(self): + super().__init__() + + if self.item_exists(sys.argv[1]): + self.path_to_chksum() + else: + self.chksum_to_path() + + def path_to_chksum(self): + print("Something") + + def chksum_to_path(self): + print("Something else") + +if len(sys.argv) > 2: + Resolve() +else: + print("Invalid argument length: Need at least two") \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py index efa8b99..1975223 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,9 +1,24 @@ +import os from dotenv import load_dotenv + from .db import Database, dbname from .fs import FileSystem, file_exists from .backup import Backup +# Required environment variables +required_vars = ( + "SOURCE_FOLDER", + "TARGET_BUCKET", + "SERVICE_NAME", + "SERVICE_KEY", + "LOG_LEVEL" +) + if not file_exists(".env"): raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'") -load_dotenv() \ No newline at end of file +load_dotenv() + +# Check that required environment variables are set +if not all(map(lambda var: os.getenv(var), required_vars)): + raise SystemExit("One or more required environment variables in '.env' have not been set") \ No newline at end of file diff --git a/src/backup.py b/src/backup.py index 7f625f8..a071603 100644 --- a/src/backup.py +++ b/src/backup.py @@ -1,3 +1,6 @@ +import os +import logging +from logging.handlers import RotatingFileHandler from typing import Union from .cloud import Storage as StorageClient @@ -7,6 +10,7 @@ from . import dbname class Backup(FileSystem): def __init__(self): super().__init__() + self.enable_logging() self.has_change = False @@ -15,8 +19,39 @@ class Backup(FileSystem): self.compress = self.db.get_flag("COMPRESS") + # Configure logging + def enable_logging(self): + self.log = logging.getLogger(__name__) + self.log.debug("Start console logging") + log_format = logging.Formatter("[%(asctime)s][%(levelname)s]: %(name)s: %(message)s") + + # Log to console + log_console = logging.StreamHandler() + log_console.setLevel(logging.INFO) + log_console.setFormatter(log_format) + + self.log.addHandler(log_console) + + # Log to file + log_file_path = os.getenv("LOG_FILE") + if log_file_path: + self.log.debug("Start file logging") + log_file = RotatingFileHandler( + log_file_path, + mode = "a", + maxBytes = 50 * 1024 * 1024, + backupCount = 5, + encoding = None, + delay = False + ) + + log_file.setLevel(os.getenv("LOG_LEVEL")) + log_file.setFormatter(log_format) + + self.log.addHandler(log_file) + # Backup a file or folder - def backup_item(self, item: Union[list, str]) -> bool: + def backup_item(self, item: Union[list, str], silent: bool = True) -> bool: if isinstance(item, str): item = self.get_item(item) @@ -32,26 +67,36 @@ class Backup(FileSystem): self.has_change = True - print(f"Uploading: '{item[0]}' ... ", end="") + self.log.info(f"'{item[0]}': Uploading") + print(f"⏳ | Uploading: '{item[0]}'", end="\r") blob = item # Upload as zip archive if self.compress: + self.log.debug(f"'{item[0]}': Compressing") blob = FileSystem.zip(blob) # Upload to cloud if self.cloud.upload(blob): + self.log.debug(f"'{item[0]}': Uploaded") + print(f"✅ | Upload successful: '{item[0]}'") # Update local database - if self.db.set_item(item): - print("OK") - else: - print("OK, but failed to update database") + if not self.db.set_item(item): + self.log.warn(f"'{item[0]}': Failed to update database") + print("⚠️ | Failed to update database") else: - print("FAILED") + self.log.error(f"'{item[0]}': {self.cloud.error}") + print(f"❌ | Upload failed: '{item[0]}'") # Remove temp zip if self.compress: FileSystem.delete(blob) + + # Deprecated: Run when a single item is backed up directly + if not silent and not self.has_change: + self.log.info("No changes found") + print("✅ | Up to date. No changes found") + return # Scan TARGET_FOLDER for files and folders to back up @@ -61,4 +106,5 @@ class Backup(FileSystem): self.backup_item(item) if not self.has_change: - print("Up to date. No changes found") \ No newline at end of file + self.log.info("No changes found") + print("✅ | Up to date. No changes found") \ No newline at end of file diff --git a/src/cloud/__init__.py b/src/cloud/__init__.py index dccc947..9090067 100644 --- a/src/cloud/__init__.py +++ b/src/cloud/__init__.py @@ -8,6 +8,8 @@ class Storage: self._service = None self.service = os.getenv("SERVICE_NAME") + self.error = None + @property def service(self): return self._service @@ -27,4 +29,7 @@ class Storage: return values def upload(self, *argv): - return self.service.upload(*argv) \ No newline at end of file + upload = self.service.upload(*argv) + self.error = self.service.error + + return upload diff --git a/src/cloud/aws.py b/src/cloud/aws.py new file mode 100644 index 0000000..66da6c9 --- /dev/null +++ b/src/cloud/aws.py @@ -0,0 +1,40 @@ +import os +import boto3 +from botocore.exceptions import ClientError + +from ..fs.utils import get_file + +class StorageClient: + def __init__(self): + self.set_access_key() + self.client = boto3.client("s3") + + self._error = None + + @property + def error(self): + return self._error + + @error.setter + def error(self, state): + self._error = state + + # Get IAM user access key and ID + def set_access_key(self): + key = os.getenv("SERVICE_KEY").split(";") + if len(key) != 2: + self.error = "Invalid AWS service key" + return False + + os.environ["aws_access_key_id"] = key[0] + os.environ["aws_secret_access_key"] = key[1] + + def upload(self, path: str) -> bool: + name = get_file(path) + + try: + resp = self.client.upload_file(path, os.getenv("TARGET_BUCKET"), name) + except ClientError as e: + self.error = e + return False + return True \ No newline at end of file diff --git a/src/cloud/azure.py b/src/cloud/azure.py new file mode 100644 index 0000000..6614297 --- /dev/null +++ b/src/cloud/azure.py @@ -0,0 +1,31 @@ +import os +from azure.storage.blob import BlobServiceClient + +from ..fs.utils import get_file + +class StorageClient: + def __init__(self): + self.client = BlobServiceClient.from_connection_string(os.getenv("SERVICE_KEY")) + + self._error = None + + @property + def error(self): + return self._error + + @error.setter + def error(self, state): + self._error = state + + def upload(self, path: str) -> bool: + name = get_file(path) + blob = self.client.get_blob_client(container=os.getenv("TARGET_BUCKET"), blob=name) + + try: + with open(path, "rb") as f: + blob.upload_blob(f,overwrite=True) + return True + except Exception as e: + if e.response.status_code == 403: + self.error = "Azure: Access key invalid or lacking required permissions" + return False \ No newline at end of file diff --git a/src/cloud/gcs.py b/src/cloud/gcloud.py similarity index 60% rename from src/cloud/gcs.py rename to src/cloud/gcloud.py index 94d2716..1f3e03c 100644 --- a/src/cloud/gcs.py +++ b/src/cloud/gcloud.py @@ -11,10 +11,21 @@ class StorageClient: client = storage.Client() self.bucket = client.bucket(self.get_bucket()) + self._error = None + + @property + def error(self): + return self._error + + @error.setter + def error(self, state): + self._error = state + def get_bucket(self): return os.getenv("TARGET_BUCKET") def upload(self, path: str) -> bool: + self.error = None name = get_file(path) blob = self.bucket.blob(name) @@ -22,5 +33,7 @@ class StorageClient: with open(path, "rb") as f: blob.upload_from_file(f) return True - except: + except Exception as e: + if e.response.status_code == 403: + self.error = "GCS: Forbidden: Account lacks 'storage.objects.create' role on target bucket" return False \ No newline at end of file diff --git a/src/db/database.py b/src/db/database.py index 8eebf39..a7ea58d 100644 --- a/src/db/database.py +++ b/src/db/database.py @@ -27,15 +27,18 @@ class Database(SQLite): # Check if item exists in the database def item_exists(self, item: Union[list, tuple]) -> bool: - sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'" - res = self.query(sql) + sql = "SELECT anchor FROM manifest WHERE anchor = ?" + res = self.query(sql, (item[0],)) return res # Check if item should be backed up by comparing mtime and checksum def check_item(self, item: Union[list, tuple]) -> bool: - sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'" - db_item = self.query(sql) + if os.getenv("FORCE_UPLOAD"): + return True + + sql = f"SELECT {self.columns} FROM manifest WHERE anchor = ?" + db_item = self.query(sql, (item[0],)) # New item or item changed, so back it up if not db_item or (item != db_item[0]): @@ -44,10 +47,12 @@ class Database(SQLite): # Insert or update item in database def set_item(self, item: Union[list, tuple]) -> bool: - sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'" + values = [item[0], item[1], item[0]] + sql = "UPDATE manifest SET anchor = ?, chksum = ? WHERE anchor = ?" if not self.item_exists(item): - sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})" - self.query(sql) + sql = f"INSERT INTO manifest ({self.columns}) VALUES (?, ?)" + values.pop() + self.query(sql, values) return True \ No newline at end of file diff --git a/src/db/flags.py b/src/db/flags.py deleted file mode 100644 index a555a3e..0000000 --- a/src/db/flags.py +++ /dev/null @@ -1,16 +0,0 @@ -from .sqlite import SQLite - -class Flags(SQLite): - def __init__(self): - super().__init__() - - self._columns = ["k", "v"] - - @property - def columns(self): - return ",".join(self._columns) - - @columns.setter - def columns(self, columns: list): - self._columns = columns - diff --git a/src/db/sqlite.py b/src/db/sqlite.py index d15e1a4..3666637 100644 --- a/src/db/sqlite.py +++ b/src/db/sqlite.py @@ -1,6 +1,7 @@ import os import pathlib import sqlite3 as sqlite +from typing import Iterable dbname = "._cloudbackup.db" @@ -23,8 +24,8 @@ class SQLite(): return " ".join([s.strip() for s in sql.splitlines()]) # Run SQL query - def query(self, sql: str): - query = self.cursor.execute(sql) + def query(self, sql: str, params: Iterable = ()): + query = self.cursor.execute(sql, params) self.db.commit() result = query.fetchall() @@ -61,8 +62,8 @@ class SQLite(): if envar: return envar - sql = f"SELECT v FROM flags WHERE k = '{key}'" - res = self.query(sql) + sql = "SELECT v FROM flags WHERE k = ?" + res = self.query(sql, [key]) if not res: return False diff --git a/src/fs/fs.py b/src/fs/fs.py index 00314d9..befb0d6 100644 --- a/src/fs/fs.py +++ b/src/fs/fs.py @@ -26,7 +26,8 @@ class FileSystem: @staticmethod def zip(item) -> str: - dest = f"{tempfile.gettempdir()}/{str(item[1])}" + name = FileSystem.chksum(item[0]) + dest = f"{tempfile.gettempdir()}/{name}" # Make a temp zip file of single file or folder if file_exists(item[0]):