Compare commits

..

No commits in common. "master" and "0.1.0" have entirely different histories.

16 changed files with 62 additions and 338 deletions

View file

@ -1,15 +1,7 @@
# (Required) Absolute path to the local folder to back up SOURCE_FOLDER=
SOURCE_FOLDER="" TARGET_BUCKET=
# (Required) Name of the remote bucket or container
TARGET_BUCKET=""
# (Required) Cloud provider (gcloud, aws, azure) # Cloud provider "gcs, aws, azure"
SERVICE_NAME="" SERVICE_NAME=
# (Required) Cloud provider access string or path to key file # Path to service account key file
SERVICE_KEY="" SERVICE_KEY=
# -----------------------------------------------------------
# (Optional) Path to log file and level
LOG_FILE=""
LOG_LEVEL="WARNING"

View file

@ -1,91 +1,43 @@
# Cloud Backup # Cloud Backup
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure. Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
## Why this exists in a world with Rclone (and similar)
This script was created to solve the specific task of archiving a folder to the cloud without read access to the bucket/container and its objects.
Cloud Backup keeps an internal database of second-level files and folders which were sucessfully uploaded to the cloud.
If a file or folder changes on disk, that specific file or folder is compressed, uploaded to the cloud, and the database gets updated. What happens to the object in the bucket is invisible to the program.
## Get started ## Get started
This program requires Python 3.6 or newer with PIP. This program requires Python 3.6 or newer with PIP.
Cloud Backups supports uploading to Google Cloud Storage, Azure Blob Storage and AWS S3.
1. **Clone this repo** 1. **Clone this repo**
```bash ```
git clone https://github.com/VictorWesterlund/cloud-backup git clone https://github.com/VictorWesterlund/cloud-backup
``` ```
2. **Install dependencies** 2. **Install dependencies**
```bash ```
# Install dependencies for your cloud provider "gcloud", "aws" or "azure". Leaving it empty will install everything
bash install.sh aws
# Or install everything directly with PIP
python3 -m pip install -r requirements.txt python3 -m pip install -r requirements.txt
``` ```
3. **Copy environment variables file** 3. **Copy environment variables file**
```bash ```
cp .env.example .env cp .env.example .env
``` ```
4. **Edit environment variables in `.env`** 4. **Edit environment variables**
Open `.env` with your text editor of choice and fill out these required variables
```bash ```bash
# Remember to double-slash escape paths on Windows 'E:\\path\\to\\something' # Path to the local folder to back up
SOURCE_FOLDER=
# Name of the remote bucket (destination)
TARGET_BUCKET=
# Absolute path to folder whose contents should be backed up # Cloud provider (gcs, s3, azure)
SOURCE_FOLDER="/home/me/backup/" SERVICE_NAME=
# Name of bucket (or "container" in Azure) # Path to service account key file
TARGET_BUCKET="home_backup" SERVICE_KEY=
# Cloud provider. "gloud", "aws" or "azure"
SERVICE_NAME="aws"
# IAM authentication
# GCS: Path to keyfile or string (GOOGLE_APPLICATION_CREDENTIALS)
# Azure: "Connection string" from the Access Key to the container
# AWS: Access key ID and secret seperated by a ";"
SERVICE_KEY="SDJSBADYUAD;asid7sad123ebasdhasnk3dnsai"
``` ```
5. **Run backup script** 5. **Run backup script**
```bash ```
python3 backup.py python3 backup.py
``` ```
Second-level files and folders should now start uploading to your destination bucket as zip archives. Second-level files and folders should now start uploading to your destination bucket as zip archives.
Subsequent runs of the `backup.py` script will only upload changed files and folders. Subsequent runs of the `backup.py` script will only upload changed files and folders.
In-fact; modified state is cached locally and doesn't request anything from your cloud provider. In-fact; modified state is cached locally and doesn't request anything from your cloud provider.
----
You can also run `backup.py` on a schedule with CRON or equivalent for your system. No requests will be sent to the cloud unless a file or folder has actually changed
## More stuff
Here are some additional settings and commands you can try
### Back up a second-level file
```bash
python3 backup.py file 'relative/path/from/.env'
```
### Resolve CRC32 to path or vice versa
```bash
python3 resolve.py '587374759'
# output: 'hello_world.txt'
python3 resolve.py 'hello_world.txt'
# output: '587374759'
```
### Optional flags in `.env`
```bash
# The following intbool flags can be added to .env to override default behavior
# Their value in this demo is the "default" state
# Archive files and folders before uploading
COMPRESS="1"
```

View file

@ -1,31 +1,5 @@
import sys import sys
from src import Backup, file_exists from src import Backup
# Back up a single file by path Backup().backup_all()
def file_backup():
path = sys.argv[2]
if len(sys.argv) < 3:
return print("Invalid argument length: Expected path to file or folder")
if not file_exists(path):
return print(f"File or folder at '{path}' does not exist")
return Backup().backup_item(path, False)
def run(method):
methods = {
"file": file_backup,
"all" : Backup().backup_all
}
if method not in methods:
return print(f"Invalid argument '{method}'")
return methods[method]()
if len(sys.argv) > 1:
run(sys.argv[1])
else:
run("all")

View file

@ -1,28 +0,0 @@
install () {
python3 -m pip install $1
}
install python-dotenv
# Install Python libraries for cloud provider
case $1 in
"gcloud")
install google-cloud-storage
;;
"azure")
install azure-storage-blob
;;
"aws")
install boto3
;;
*) ;;
esac
# Create .env file if it doesn't exist
if [ ! -f ".env" ]; then
cp .env.example .env
sed -i "s/SERVICE_NAME=\"\"/SERVICE_NAME=\"$1\"" .env
fi

View file

@ -1,4 +1,2 @@
python-dotenv python-dotenv
google-cloud-storage google-cloud-storage
azure-storage-blob
boto3

View file

@ -1,23 +0,0 @@
import sys
from src import Database
class Resolve(Database):
def __init__(self):
super().__init__()
if self.item_exists(sys.argv[1]):
self.path_to_chksum()
else:
self.chksum_to_path()
def path_to_chksum(self):
print("Something")
def chksum_to_path(self):
print("Something else")
if len(sys.argv) > 2:
Resolve()
else:
print("Invalid argument length: Need at least two")

View file

@ -1,24 +1,9 @@
import os
from dotenv import load_dotenv from dotenv import load_dotenv
from .db import Database, dbname from .db import Database, dbname
from .fs import FileSystem, file_exists from .fs import FileSystem, file_exists
from .backup import Backup from .backup import Backup
# Required environment variables
required_vars = (
"SOURCE_FOLDER",
"TARGET_BUCKET",
"SERVICE_NAME",
"SERVICE_KEY",
"LOG_LEVEL"
)
if not file_exists(".env"): if not file_exists(".env"):
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'") raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
load_dotenv() load_dotenv()
# Check that required environment variables are set
if not all(map(lambda var: os.getenv(var), required_vars)):
raise SystemExit("One or more required environment variables in '.env' have not been set")

View file

@ -1,6 +1,3 @@
import os
import logging
from logging.handlers import RotatingFileHandler
from typing import Union from typing import Union
from .cloud import Storage as StorageClient from .cloud import Storage as StorageClient
@ -10,7 +7,6 @@ from . import dbname
class Backup(FileSystem): class Backup(FileSystem):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.enable_logging()
self.has_change = False self.has_change = False
@ -19,39 +15,8 @@ class Backup(FileSystem):
self.compress = self.db.get_flag("COMPRESS") self.compress = self.db.get_flag("COMPRESS")
# Configure logging
def enable_logging(self):
self.log = logging.getLogger(__name__)
self.log.debug("Start console logging")
log_format = logging.Formatter("[%(asctime)s][%(levelname)s]: %(name)s: %(message)s")
# Log to console
log_console = logging.StreamHandler()
log_console.setLevel(logging.INFO)
log_console.setFormatter(log_format)
self.log.addHandler(log_console)
# Log to file
log_file_path = os.getenv("LOG_FILE")
if log_file_path:
self.log.debug("Start file logging")
log_file = RotatingFileHandler(
log_file_path,
mode = "a",
maxBytes = 50 * 1024 * 1024,
backupCount = 5,
encoding = None,
delay = False
)
log_file.setLevel(os.getenv("LOG_LEVEL"))
log_file.setFormatter(log_format)
self.log.addHandler(log_file)
# Backup a file or folder # Backup a file or folder
def backup_item(self, item: Union[list, str], silent: bool = True) -> bool: def backup_item(self, item: Union[list, str]) -> bool:
if isinstance(item, str): if isinstance(item, str):
item = self.get_item(item) item = self.get_item(item)
@ -67,36 +32,26 @@ class Backup(FileSystem):
self.has_change = True self.has_change = True
self.log.info(f"'{item[0]}': Uploading") print(f"Uploading: '{item[0]}' ... ", end="")
print(f"⏳ | Uploading: '{item[0]}'", end="\r")
blob = item blob = item
# Upload as zip archive # Upload as zip archive
if self.compress: if self.compress:
self.log.debug(f"'{item[0]}': Compressing")
blob = FileSystem.zip(blob) blob = FileSystem.zip(blob)
# Upload to cloud # Upload to cloud
if self.cloud.upload(blob): if self.cloud.upload(blob):
self.log.debug(f"'{item[0]}': Uploaded")
print(f"✅ | Upload successful: '{item[0]}'")
# Update local database # Update local database
if not self.db.set_item(item): if self.db.set_item(item):
self.log.warn(f"'{item[0]}': Failed to update database") print("OK")
print("⚠️ | Failed to update database") else:
print("OK, but failed to update database")
else: else:
self.log.error(f"'{item[0]}': {self.cloud.error}") print("FAILED")
print(f"❌ | Upload failed: '{item[0]}'")
# Remove temp zip # Remove temp zip
if self.compress: if self.compress:
FileSystem.delete(blob) FileSystem.delete(blob)
# Deprecated: Run when a single item is backed up directly
if not silent and not self.has_change:
self.log.info("No changes found")
print("✅ | Up to date. No changes found")
return return
# Scan TARGET_FOLDER for files and folders to back up # Scan TARGET_FOLDER for files and folders to back up
@ -106,5 +61,4 @@ class Backup(FileSystem):
self.backup_item(item) self.backup_item(item)
if not self.has_change: if not self.has_change:
self.log.info("No changes found") print("Up to date. No changes found")
print("✅ | Up to date. No changes found")

View file

@ -8,8 +8,6 @@ class Storage:
self._service = None self._service = None
self.service = os.getenv("SERVICE_NAME") self.service = os.getenv("SERVICE_NAME")
self.error = None
@property @property
def service(self): def service(self):
return self._service return self._service
@ -29,7 +27,4 @@ class Storage:
return values return values
def upload(self, *argv): def upload(self, *argv):
upload = self.service.upload(*argv) return self.service.upload(*argv)
self.error = self.service.error
return upload

View file

@ -1,40 +0,0 @@
import os
import boto3
from botocore.exceptions import ClientError
from ..fs.utils import get_file
class StorageClient:
def __init__(self):
self.set_access_key()
self.client = boto3.client("s3")
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
# Get IAM user access key and ID
def set_access_key(self):
key = os.getenv("SERVICE_KEY").split(";")
if len(key) != 2:
self.error = "Invalid AWS service key"
return False
os.environ["aws_access_key_id"] = key[0]
os.environ["aws_secret_access_key"] = key[1]
def upload(self, path: str) -> bool:
name = get_file(path)
try:
resp = self.client.upload_file(path, os.getenv("TARGET_BUCKET"), name)
except ClientError as e:
self.error = e
return False
return True

View file

@ -1,31 +0,0 @@
import os
from azure.storage.blob import BlobServiceClient
from ..fs.utils import get_file
class StorageClient:
def __init__(self):
self.client = BlobServiceClient.from_connection_string(os.getenv("SERVICE_KEY"))
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
def upload(self, path: str) -> bool:
name = get_file(path)
blob = self.client.get_blob_client(container=os.getenv("TARGET_BUCKET"), blob=name)
try:
with open(path, "rb") as f:
blob.upload_blob(f,overwrite=True)
return True
except Exception as e:
if e.response.status_code == 403:
self.error = "Azure: Access key invalid or lacking required permissions"
return False

View file

@ -11,21 +11,10 @@ class StorageClient:
client = storage.Client() client = storage.Client()
self.bucket = client.bucket(self.get_bucket()) self.bucket = client.bucket(self.get_bucket())
self._error = None
@property
def error(self):
return self._error
@error.setter
def error(self, state):
self._error = state
def get_bucket(self): def get_bucket(self):
return os.getenv("TARGET_BUCKET") return os.getenv("TARGET_BUCKET")
def upload(self, path: str) -> bool: def upload(self, path: str) -> bool:
self.error = None
name = get_file(path) name = get_file(path)
blob = self.bucket.blob(name) blob = self.bucket.blob(name)
@ -33,7 +22,5 @@ class StorageClient:
with open(path, "rb") as f: with open(path, "rb") as f:
blob.upload_from_file(f) blob.upload_from_file(f)
return True return True
except Exception as e: except:
if e.response.status_code == 403:
self.error = "GCS: Forbidden: Account lacks 'storage.objects.create' role on target bucket"
return False return False

View file

@ -27,18 +27,15 @@ class Database(SQLite):
# Check if item exists in the database # Check if item exists in the database
def item_exists(self, item: Union[list, tuple]) -> bool: def item_exists(self, item: Union[list, tuple]) -> bool:
sql = "SELECT anchor FROM manifest WHERE anchor = ?" sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
res = self.query(sql, (item[0],)) res = self.query(sql)
return res return res
# Check if item should be backed up by comparing mtime and checksum # Check if item should be backed up by comparing mtime and checksum
def check_item(self, item: Union[list, tuple]) -> bool: def check_item(self, item: Union[list, tuple]) -> bool:
if os.getenv("FORCE_UPLOAD"): sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
return True db_item = self.query(sql)
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = ?"
db_item = self.query(sql, (item[0],))
# New item or item changed, so back it up # New item or item changed, so back it up
if not db_item or (item != db_item[0]): if not db_item or (item != db_item[0]):
@ -47,12 +44,10 @@ class Database(SQLite):
# Insert or update item in database # Insert or update item in database
def set_item(self, item: Union[list, tuple]) -> bool: def set_item(self, item: Union[list, tuple]) -> bool:
values = [item[0], item[1], item[0]] sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
sql = "UPDATE manifest SET anchor = ?, chksum = ? WHERE anchor = ?"
if not self.item_exists(item): if not self.item_exists(item):
sql = f"INSERT INTO manifest ({self.columns}) VALUES (?, ?)" sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
values.pop() self.query(sql)
self.query(sql, values)
return True return True

16
src/db/flags.py Normal file
View file

@ -0,0 +1,16 @@
from .sqlite import SQLite
class Flags(SQLite):
def __init__(self):
super().__init__()
self._columns = ["k", "v"]
@property
def columns(self):
return ",".join(self._columns)
@columns.setter
def columns(self, columns: list):
self._columns = columns

View file

@ -1,7 +1,6 @@
import os import os
import pathlib import pathlib
import sqlite3 as sqlite import sqlite3 as sqlite
from typing import Iterable
dbname = "._cloudbackup.db" dbname = "._cloudbackup.db"
@ -24,8 +23,8 @@ class SQLite():
return " ".join([s.strip() for s in sql.splitlines()]) return " ".join([s.strip() for s in sql.splitlines()])
# Run SQL query # Run SQL query
def query(self, sql: str, params: Iterable = ()): def query(self, sql: str):
query = self.cursor.execute(sql, params) query = self.cursor.execute(sql)
self.db.commit() self.db.commit()
result = query.fetchall() result = query.fetchall()
@ -62,8 +61,8 @@ class SQLite():
if envar: if envar:
return envar return envar
sql = "SELECT v FROM flags WHERE k = ?" sql = f"SELECT v FROM flags WHERE k = '{key}'"
res = self.query(sql, [key]) res = self.query(sql)
if not res: if not res:
return False return False

View file

@ -26,8 +26,7 @@ class FileSystem:
@staticmethod @staticmethod
def zip(item) -> str: def zip(item) -> str:
name = FileSystem.chksum(item[0]) dest = f"{tempfile.gettempdir()}/{str(item[1])}"
dest = f"{tempfile.gettempdir()}/{name}"
# Make a temp zip file of single file or folder # Make a temp zip file of single file or folder
if file_exists(item[0]): if file_exists(item[0]):