Version 1.0 (#3)

* wip(22w9a): sql param fix

* wip(22w9b): add azure

* wip(22w9c): add single item backup

* wip(22w10a): add logger

* wip(22w11a): add aws support

* Update README.md
This commit is contained in:
Victor Westerlund 2022-03-15 07:06:54 -08:00 committed by GitHub
parent 247e6732bf
commit 76c006823e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 334 additions and 62 deletions

View file

@ -1,7 +1,15 @@
SOURCE_FOLDER=
TARGET_BUCKET=
# (Required) Absolute path to the local folder to back up
SOURCE_FOLDER=""
# (Required) Name of the remote bucket or container
TARGET_BUCKET=""
# Cloud provider "gcs, aws, azure"
SERVICE_NAME=
# Path to service account key file
SERVICE_KEY=
# (Required) Cloud provider (gcloud, aws, azure)
SERVICE_NAME=""
# (Required) Cloud provider access string or path to key file
SERVICE_KEY=""
# -----------------------------------------------------------
# (Optional) Path to log file and level
LOG_FILE=""
LOG_LEVEL="WARNING"

View file

@ -1,43 +1,87 @@
# Cloud Backup
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
## Why this exists in a world with Rclone (and similar)
This script was created to solve the specific task of archiving a folder to the cloud without read access to the bucket/container and its objects.
Cloud Backup keeps an internal database of second-level files and folders which were sucessfully uploaded to the cloud.
If a file or folder changes on disk, that specific file or folder is compressed, uploaded to the cloud, and the database gets updated. What happens to the object in the bucket is invisible to the program.
## Get started
This program requires Python 3.6 or newer with PIP.
Cloud Backups supports uploading to Google Cloud Storage, Azure Blob Storage and AWS S3.
1. **Clone this repo**
```
```bash
git clone https://github.com/VictorWesterlund/cloud-backup
```
2. **Install dependencies**
```
```bash
python3 -m pip install -r requirements.txt
```
3. **Copy environment variables file**
```
```bash
cp .env.example .env
```
4. **Edit environment variables**
Open `.env` with your text editor of choice and fill out these required variables
4. **Edit environment variables in `.env`**
```bash
# Path to the local folder to back up
SOURCE_FOLDER=
# Name of the remote bucket (destination)
TARGET_BUCKET=
# Remember to double-slash escape paths on Windows 'E:\\path\\to\\something'
# Cloud provider (gcs, s3, azure)
SERVICE_NAME=
# Path to service account key file
SERVICE_KEY=
# Absolute path to folder whose contents should be backed up
SOURCE_FOLDER="/home/me/backup/"
# Name of bucket (or "container" in Azure)
TARGET_BUCKET="home_backup"
# Cloud provider. "gloud", "aws" or "azure"
SERVICE_NAME="aws"
# IAM authentication
# GCS: Path to keyfile or string (GOOGLE_APPLICATION_CREDENTIALS)
# Azure: "Connection string" from the Access Key to the container
# AWS: Access key ID and secret seperated by a ";"
SERVICE_KEY="SDJSBADYUAD;asid7sad123ebasdhasnk3dnsai"
```
5. **Run backup script**
```
```bash
python3 backup.py
```
Second-level files and folders should now start uploading to your destination bucket as zip archives.
Subsequent runs of the `backup.py` script will only upload changed files and folders.
In-fact; modified state is cached locally and doesn't request anything from your cloud provider.
----
You can also run `backup.py` on a schedule with CRON or equivalent for your system. No requests will be sent to the cloud unless a file or folder has actually changed
## More stuff
Here are some additional settings and commands you can try
### Back up a second-level file
```bash
python3 backup.py file 'relative/path/from/.env'
```
### Resolve CRC32 to path or vice versa
```bash
python3 resolve.py '587374759'
# output: 'hello_world.txt'
python3 resolve.py 'hello_world.txt'
# output: '587374759'
```
### Optional flags in `.env`
```bash
# The following intbool flags can be added to .env to override default behavior
# Their value in this demo is the "default" state
# Archive files and folders before uploading
COMPRESS="1"
```

View file

@ -1,5 +1,31 @@
import sys
from src import Backup
from src import Backup, file_exists
Backup().backup_all()
# Back up a single file by path
def file_backup():
path = sys.argv[2]
if len(sys.argv) < 3:
return print("Invalid argument length: Expected path to file or folder")
if not file_exists(path):
return print(f"File or folder at '{path}' does not exist")
return Backup().backup_item(path, False)
def run(method):
methods = {
"file": file_backup,
"all" : Backup().backup_all
}
if method not in methods:
return print(f"Invalid argument '{method}'")
return methods[method]()
if len(sys.argv) > 1:
run(sys.argv[1])
else:
run("all")

28
install.sh Executable file
View file

@ -0,0 +1,28 @@
install () {
python3 -m pip install $1
}
install python-dotenv
# Install Python libraries for cloud provider
case $1 in
"gcs")
install google-cloud-storage
;;
"azure")
install azure-storage-blob
;;
"aws")
install boto3
;;
*) ;;
esac
# Create .env file if it doesn't exist
if [ ! -f ".env" ]; then
cp .env.example .env
sed -i "s/SERVICE_NAME=\"\"/SERVICE_NAME=\"$1\"" .env
fi

View file

@ -1,2 +1,4 @@
python-dotenv
google-cloud-storage
google-cloud-storage
azure-storage-blob
boto3

23
resolve.py Normal file
View file

@ -0,0 +1,23 @@
import sys
from src import Database
class Resolve(Database):
def __init__(self):
super().__init__()
if self.item_exists(sys.argv[1]):
self.path_to_chksum()
else:
self.chksum_to_path()
def path_to_chksum(self):
print("Something")
def chksum_to_path(self):
print("Something else")
if len(sys.argv) > 2:
Resolve()
else:
print("Invalid argument length: Need at least two")

View file

@ -1,9 +1,24 @@
import os
from dotenv import load_dotenv
from .db import Database, dbname
from .fs import FileSystem, file_exists
from .backup import Backup
# Required environment variables
required_vars = (
"SOURCE_FOLDER",
"TARGET_BUCKET",
"SERVICE_NAME",
"SERVICE_KEY",
"LOG_LEVEL"
)
if not file_exists(".env"):
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
load_dotenv()
load_dotenv()
# Check that required environment variables are set
if not all(map(lambda var: os.getenv(var), required_vars)):
raise SystemExit("One or more required environment variables in '.env' have not been set")

View file

@ -1,3 +1,6 @@
import os
import logging
from logging.handlers import RotatingFileHandler
from typing import Union
from .cloud import Storage as StorageClient
@ -7,6 +10,7 @@ from . import dbname
class Backup(FileSystem):
def __init__(self):
super().__init__()
self.enable_logging()
self.has_change = False
@ -15,8 +19,39 @@ class Backup(FileSystem):
self.compress = self.db.get_flag("COMPRESS")
# Configure logging
def enable_logging(self):
self.log = logging.getLogger(__name__)
self.log.debug("Start console logging")
log_format = logging.Formatter("[%(asctime)s][%(levelname)s]: %(name)s: %(message)s")
# Log to console
log_console = logging.StreamHandler()
log_console.setLevel(logging.INFO)
log_console.setFormatter(log_format)
self.log.addHandler(log_console)
# Log to file
log_file_path = os.getenv("LOG_FILE")
if log_file_path:
self.log.debug("Start file logging")
log_file = RotatingFileHandler(
log_file_path,
mode = "a",
maxBytes = 50 * 1024 * 1024,
backupCount = 5,
encoding = None,
delay = False
)
log_file.setLevel(os.getenv("LOG_LEVEL"))
log_file.setFormatter(log_format)
self.log.addHandler(log_file)
# Backup a file or folder
def backup_item(self, item: Union[list, str]) -> bool:
def backup_item(self, item: Union[list, str], silent: bool = True) -> bool:
if isinstance(item, str):
item = self.get_item(item)
@ -32,26 +67,36 @@ class Backup(FileSystem):
self.has_change = True
print(f"Uploading: '{item[0]}' ... ", end="")
self.log.info(f"'{item[0]}': Uploading")
print(f"⏳ | Uploading: '{item[0]}'", end="\r")
blob = item
# Upload as zip archive
if self.compress:
self.log.debug(f"'{item[0]}': Compressing")
blob = FileSystem.zip(blob)
# Upload to cloud
if self.cloud.upload(blob):
self.log.debug(f"'{item[0]}': Uploaded")
print(f"✅ | Upload successful: '{item[0]}'")
# Update local database
if self.db.set_item(item):
print("OK")
else:
print("OK, but failed to update database")
if not self.db.set_item(item):
self.log.warn(f"'{item[0]}': Failed to update database")
print("⚠️ | Failed to update database")
else:
print("FAILED")
self.log.error(f"'{item[0]}': {self.cloud.error}")
print(f"❌ | Upload failed: '{item[0]}'")
# Remove temp zip
if self.compress:
FileSystem.delete(blob)
# Deprecated: Run when a single item is backed up directly
if not silent and not self.has_change:
self.log.info("No changes found")
print("✅ | Up to date. No changes found")
return
# Scan TARGET_FOLDER for files and folders to back up
@ -61,4 +106,5 @@ class Backup(FileSystem):
self.backup_item(item)
if not self.has_change:
print("Up to date. No changes found")
self.log.info("No changes found")
print("✅ | Up to date. No changes found")

View file

@ -8,6 +8,8 @@ class Storage:
self._service = None
self.service = os.getenv("SERVICE_NAME")
self.error = None
@property
def service(self):
return self._service
@ -27,4 +29,7 @@ class Storage:
return values
def upload(self, *argv):
return self.service.upload(*argv)
upload = self.service.upload(*argv)
self.error = self.service.error
return upload

40
src/cloud/aws.py Normal file
View file

@ -0,0 +1,40 @@
import os
import boto3
from botocore.exceptions import ClientError
from ..fs.utils import get_file
class StorageClient:
def __init__(self):
self.set_access_key()
self.client = boto3.client("s3")
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
# Get IAM user access key and ID
def set_access_key(self):
key = os.getenv("SERVICE_KEY").split(";")
if len(key) != 2:
self.error = "Invalid AWS service key"
return False
os.environ["aws_access_key_id"] = key[0]
os.environ["aws_secret_access_key"] = key[1]
def upload(self, path: str) -> bool:
name = get_file(path)
try:
resp = self.client.upload_file(path, os.getenv("TARGET_BUCKET"), name)
except ClientError as e:
self.error = e
return False
return True

31
src/cloud/azure.py Normal file
View file

@ -0,0 +1,31 @@
import os
from azure.storage.blob import BlobServiceClient
from ..fs.utils import get_file
class StorageClient:
def __init__(self):
self.client = BlobServiceClient.from_connection_string(os.getenv("SERVICE_KEY"))
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
def upload(self, path: str) -> bool:
name = get_file(path)
blob = self.client.get_blob_client(container=os.getenv("TARGET_BUCKET"), blob=name)
try:
with open(path, "rb") as f:
blob.upload_blob(f,overwrite=True)
return True
except Exception as e:
if e.response.status_code == 403:
self.error = "Azure: Access key invalid or lacking required permissions"
return False

View file

@ -11,10 +11,21 @@ class StorageClient:
client = storage.Client()
self.bucket = client.bucket(self.get_bucket())
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
def get_bucket(self):
return os.getenv("TARGET_BUCKET")
def upload(self, path: str) -> bool:
self.error = None
name = get_file(path)
blob = self.bucket.blob(name)
@ -22,5 +33,7 @@ class StorageClient:
with open(path, "rb") as f:
blob.upload_from_file(f)
return True
except:
except Exception as e:
if e.response.status_code == 403:
self.error = "GCS: Forbidden: Account lacks 'storage.objects.create' role on target bucket"
return False

View file

@ -27,15 +27,18 @@ class Database(SQLite):
# Check if item exists in the database
def item_exists(self, item: Union[list, tuple]) -> bool:
sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
res = self.query(sql)
sql = "SELECT anchor FROM manifest WHERE anchor = ?"
res = self.query(sql, (item[0],))
return res
# Check if item should be backed up by comparing mtime and checksum
def check_item(self, item: Union[list, tuple]) -> bool:
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
db_item = self.query(sql)
if os.getenv("FORCE_UPLOAD"):
return True
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = ?"
db_item = self.query(sql, (item[0],))
# New item or item changed, so back it up
if not db_item or (item != db_item[0]):
@ -44,10 +47,12 @@ class Database(SQLite):
# Insert or update item in database
def set_item(self, item: Union[list, tuple]) -> bool:
sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
values = [item[0], item[1], item[0]]
sql = "UPDATE manifest SET anchor = ?, chksum = ? WHERE anchor = ?"
if not self.item_exists(item):
sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
self.query(sql)
sql = f"INSERT INTO manifest ({self.columns}) VALUES (?, ?)"
values.pop()
self.query(sql, values)
return True

View file

@ -1,16 +0,0 @@
from .sqlite import SQLite
class Flags(SQLite):
def __init__(self):
super().__init__()
self._columns = ["k", "v"]
@property
def columns(self):
return ",".join(self._columns)
@columns.setter
def columns(self, columns: list):
self._columns = columns

View file

@ -1,6 +1,7 @@
import os
import pathlib
import sqlite3 as sqlite
from typing import Iterable
dbname = "._cloudbackup.db"
@ -23,8 +24,8 @@ class SQLite():
return " ".join([s.strip() for s in sql.splitlines()])
# Run SQL query
def query(self, sql: str):
query = self.cursor.execute(sql)
def query(self, sql: str, params: Iterable = ()):
query = self.cursor.execute(sql, params)
self.db.commit()
result = query.fetchall()
@ -61,8 +62,8 @@ class SQLite():
if envar:
return envar
sql = f"SELECT v FROM flags WHERE k = '{key}'"
res = self.query(sql)
sql = "SELECT v FROM flags WHERE k = ?"
res = self.query(sql, [key])
if not res:
return False

View file

@ -26,7 +26,8 @@ class FileSystem:
@staticmethod
def zip(item) -> str:
dest = f"{tempfile.gettempdir()}/{str(item[1])}"
name = FileSystem.chksum(item[0])
dest = f"{tempfile.gettempdir()}/{name}"
# Make a temp zip file of single file or folder
if file_exists(item[0]):