mirror of
https://codeberg.org/vlw/cloud-backup.git
synced 2025-09-13 17:43:42 +02:00
Compare commits
2 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
eee7ffd989 | ||
![]() |
76c006823e |
16 changed files with 338 additions and 62 deletions
20
.env.example
20
.env.example
|
@ -1,7 +1,15 @@
|
|||
SOURCE_FOLDER=
|
||||
TARGET_BUCKET=
|
||||
# (Required) Absolute path to the local folder to back up
|
||||
SOURCE_FOLDER=""
|
||||
# (Required) Name of the remote bucket or container
|
||||
TARGET_BUCKET=""
|
||||
|
||||
# Cloud provider "gcs, aws, azure"
|
||||
SERVICE_NAME=
|
||||
# Path to service account key file
|
||||
SERVICE_KEY=
|
||||
# (Required) Cloud provider (gcloud, aws, azure)
|
||||
SERVICE_NAME=""
|
||||
# (Required) Cloud provider access string or path to key file
|
||||
SERVICE_KEY=""
|
||||
|
||||
# -----------------------------------------------------------
|
||||
|
||||
# (Optional) Path to log file and level
|
||||
LOG_FILE=""
|
||||
LOG_LEVEL="WARNING"
|
76
README.md
76
README.md
|
@ -1,43 +1,91 @@
|
|||
# Cloud Backup
|
||||
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
|
||||
|
||||
## Why this exists in a world with Rclone (and similar)
|
||||
|
||||
This script was created to solve the specific task of archiving a folder to the cloud without read access to the bucket/container and its objects.
|
||||
Cloud Backup keeps an internal database of second-level files and folders which were sucessfully uploaded to the cloud.
|
||||
|
||||
If a file or folder changes on disk, that specific file or folder is compressed, uploaded to the cloud, and the database gets updated. What happens to the object in the bucket is invisible to the program.
|
||||
|
||||
## Get started
|
||||
This program requires Python 3.6 or newer with PIP.
|
||||
|
||||
Cloud Backups supports uploading to Google Cloud Storage, Azure Blob Storage and AWS S3.
|
||||
|
||||
1. **Clone this repo**
|
||||
```
|
||||
```bash
|
||||
git clone https://github.com/VictorWesterlund/cloud-backup
|
||||
```
|
||||
|
||||
2. **Install dependencies**
|
||||
```
|
||||
```bash
|
||||
# Install dependencies for your cloud provider "gcloud", "aws" or "azure". Leaving it empty will install everything
|
||||
bash install.sh aws
|
||||
|
||||
# Or install everything directly with PIP
|
||||
python3 -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. **Copy environment variables file**
|
||||
```
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
4. **Edit environment variables**
|
||||
Open `.env` with your text editor of choice and fill out these required variables
|
||||
4. **Edit environment variables in `.env`**
|
||||
```bash
|
||||
# Path to the local folder to back up
|
||||
SOURCE_FOLDER=
|
||||
# Name of the remote bucket (destination)
|
||||
TARGET_BUCKET=
|
||||
# Remember to double-slash escape paths on Windows 'E:\\path\\to\\something'
|
||||
|
||||
# Cloud provider (gcs, s3, azure)
|
||||
SERVICE_NAME=
|
||||
# Path to service account key file
|
||||
SERVICE_KEY=
|
||||
# Absolute path to folder whose contents should be backed up
|
||||
SOURCE_FOLDER="/home/me/backup/"
|
||||
# Name of bucket (or "container" in Azure)
|
||||
TARGET_BUCKET="home_backup"
|
||||
|
||||
# Cloud provider. "gloud", "aws" or "azure"
|
||||
SERVICE_NAME="aws"
|
||||
# IAM authentication
|
||||
# GCS: Path to keyfile or string (GOOGLE_APPLICATION_CREDENTIALS)
|
||||
# Azure: "Connection string" from the Access Key to the container
|
||||
# AWS: Access key ID and secret seperated by a ";"
|
||||
SERVICE_KEY="SDJSBADYUAD;asid7sad123ebasdhasnk3dnsai"
|
||||
```
|
||||
|
||||
5. **Run backup script**
|
||||
```
|
||||
```bash
|
||||
python3 backup.py
|
||||
```
|
||||
|
||||
Second-level files and folders should now start uploading to your destination bucket as zip archives.
|
||||
Subsequent runs of the `backup.py` script will only upload changed files and folders.
|
||||
In-fact; modified state is cached locally and doesn't request anything from your cloud provider.
|
||||
|
||||
----
|
||||
|
||||
You can also run `backup.py` on a schedule with CRON or equivalent for your system. No requests will be sent to the cloud unless a file or folder has actually changed
|
||||
|
||||
## More stuff
|
||||
|
||||
Here are some additional settings and commands you can try
|
||||
|
||||
### Back up a second-level file
|
||||
```bash
|
||||
python3 backup.py file 'relative/path/from/.env'
|
||||
```
|
||||
|
||||
### Resolve CRC32 to path or vice versa
|
||||
```bash
|
||||
python3 resolve.py '587374759'
|
||||
# output: 'hello_world.txt'
|
||||
|
||||
python3 resolve.py 'hello_world.txt'
|
||||
# output: '587374759'
|
||||
```
|
||||
|
||||
### Optional flags in `.env`
|
||||
```bash
|
||||
# The following intbool flags can be added to .env to override default behavior
|
||||
# Their value in this demo is the "default" state
|
||||
|
||||
# Archive files and folders before uploading
|
||||
COMPRESS="1"
|
||||
```
|
||||
|
|
30
backup.py
30
backup.py
|
@ -1,5 +1,31 @@
|
|||
import sys
|
||||
|
||||
from src import Backup
|
||||
from src import Backup, file_exists
|
||||
|
||||
Backup().backup_all()
|
||||
# Back up a single file by path
|
||||
def file_backup():
|
||||
path = sys.argv[2]
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
return print("Invalid argument length: Expected path to file or folder")
|
||||
|
||||
if not file_exists(path):
|
||||
return print(f"File or folder at '{path}' does not exist")
|
||||
|
||||
return Backup().backup_item(path, False)
|
||||
|
||||
def run(method):
|
||||
methods = {
|
||||
"file": file_backup,
|
||||
"all" : Backup().backup_all
|
||||
}
|
||||
|
||||
if method not in methods:
|
||||
return print(f"Invalid argument '{method}'")
|
||||
|
||||
return methods[method]()
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
run(sys.argv[1])
|
||||
else:
|
||||
run("all")
|
28
install.sh
Executable file
28
install.sh
Executable file
|
@ -0,0 +1,28 @@
|
|||
install () {
|
||||
python3 -m pip install $1
|
||||
}
|
||||
|
||||
install python-dotenv
|
||||
|
||||
# Install Python libraries for cloud provider
|
||||
case $1 in
|
||||
"gcloud")
|
||||
install google-cloud-storage
|
||||
;;
|
||||
|
||||
"azure")
|
||||
install azure-storage-blob
|
||||
;;
|
||||
|
||||
"aws")
|
||||
install boto3
|
||||
;;
|
||||
|
||||
*) ;;
|
||||
esac
|
||||
|
||||
# Create .env file if it doesn't exist
|
||||
if [ ! -f ".env" ]; then
|
||||
cp .env.example .env
|
||||
sed -i "s/SERVICE_NAME=\"\"/SERVICE_NAME=\"$1\"" .env
|
||||
fi
|
|
@ -1,2 +1,4 @@
|
|||
python-dotenv
|
||||
google-cloud-storage
|
||||
google-cloud-storage
|
||||
azure-storage-blob
|
||||
boto3
|
23
resolve.py
Normal file
23
resolve.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import sys
|
||||
|
||||
from src import Database
|
||||
|
||||
class Resolve(Database):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
if self.item_exists(sys.argv[1]):
|
||||
self.path_to_chksum()
|
||||
else:
|
||||
self.chksum_to_path()
|
||||
|
||||
def path_to_chksum(self):
|
||||
print("Something")
|
||||
|
||||
def chksum_to_path(self):
|
||||
print("Something else")
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
Resolve()
|
||||
else:
|
||||
print("Invalid argument length: Need at least two")
|
|
@ -1,9 +1,24 @@
|
|||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .db import Database, dbname
|
||||
from .fs import FileSystem, file_exists
|
||||
from .backup import Backup
|
||||
|
||||
# Required environment variables
|
||||
required_vars = (
|
||||
"SOURCE_FOLDER",
|
||||
"TARGET_BUCKET",
|
||||
"SERVICE_NAME",
|
||||
"SERVICE_KEY",
|
||||
"LOG_LEVEL"
|
||||
)
|
||||
|
||||
if not file_exists(".env"):
|
||||
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
|
||||
|
||||
load_dotenv()
|
||||
load_dotenv()
|
||||
|
||||
# Check that required environment variables are set
|
||||
if not all(map(lambda var: os.getenv(var), required_vars)):
|
||||
raise SystemExit("One or more required environment variables in '.env' have not been set")
|
|
@ -1,3 +1,6 @@
|
|||
import os
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from typing import Union
|
||||
|
||||
from .cloud import Storage as StorageClient
|
||||
|
@ -7,6 +10,7 @@ from . import dbname
|
|||
class Backup(FileSystem):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.enable_logging()
|
||||
|
||||
self.has_change = False
|
||||
|
||||
|
@ -15,8 +19,39 @@ class Backup(FileSystem):
|
|||
|
||||
self.compress = self.db.get_flag("COMPRESS")
|
||||
|
||||
# Configure logging
|
||||
def enable_logging(self):
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.log.debug("Start console logging")
|
||||
log_format = logging.Formatter("[%(asctime)s][%(levelname)s]: %(name)s: %(message)s")
|
||||
|
||||
# Log to console
|
||||
log_console = logging.StreamHandler()
|
||||
log_console.setLevel(logging.INFO)
|
||||
log_console.setFormatter(log_format)
|
||||
|
||||
self.log.addHandler(log_console)
|
||||
|
||||
# Log to file
|
||||
log_file_path = os.getenv("LOG_FILE")
|
||||
if log_file_path:
|
||||
self.log.debug("Start file logging")
|
||||
log_file = RotatingFileHandler(
|
||||
log_file_path,
|
||||
mode = "a",
|
||||
maxBytes = 50 * 1024 * 1024,
|
||||
backupCount = 5,
|
||||
encoding = None,
|
||||
delay = False
|
||||
)
|
||||
|
||||
log_file.setLevel(os.getenv("LOG_LEVEL"))
|
||||
log_file.setFormatter(log_format)
|
||||
|
||||
self.log.addHandler(log_file)
|
||||
|
||||
# Backup a file or folder
|
||||
def backup_item(self, item: Union[list, str]) -> bool:
|
||||
def backup_item(self, item: Union[list, str], silent: bool = True) -> bool:
|
||||
if isinstance(item, str):
|
||||
item = self.get_item(item)
|
||||
|
||||
|
@ -32,26 +67,36 @@ class Backup(FileSystem):
|
|||
|
||||
self.has_change = True
|
||||
|
||||
print(f"Uploading: '{item[0]}' ... ", end="")
|
||||
self.log.info(f"'{item[0]}': Uploading")
|
||||
print(f"⏳ | Uploading: '{item[0]}'", end="\r")
|
||||
|
||||
blob = item
|
||||
# Upload as zip archive
|
||||
if self.compress:
|
||||
self.log.debug(f"'{item[0]}': Compressing")
|
||||
blob = FileSystem.zip(blob)
|
||||
|
||||
# Upload to cloud
|
||||
if self.cloud.upload(blob):
|
||||
self.log.debug(f"'{item[0]}': Uploaded")
|
||||
print(f"✅ | Upload successful: '{item[0]}'")
|
||||
# Update local database
|
||||
if self.db.set_item(item):
|
||||
print("OK")
|
||||
else:
|
||||
print("OK, but failed to update database")
|
||||
if not self.db.set_item(item):
|
||||
self.log.warn(f"'{item[0]}': Failed to update database")
|
||||
print("⚠️ | Failed to update database")
|
||||
else:
|
||||
print("FAILED")
|
||||
self.log.error(f"'{item[0]}': {self.cloud.error}")
|
||||
print(f"❌ | Upload failed: '{item[0]}'")
|
||||
|
||||
# Remove temp zip
|
||||
if self.compress:
|
||||
FileSystem.delete(blob)
|
||||
|
||||
# Deprecated: Run when a single item is backed up directly
|
||||
if not silent and not self.has_change:
|
||||
self.log.info("No changes found")
|
||||
print("✅ | Up to date. No changes found")
|
||||
|
||||
return
|
||||
|
||||
# Scan TARGET_FOLDER for files and folders to back up
|
||||
|
@ -61,4 +106,5 @@ class Backup(FileSystem):
|
|||
self.backup_item(item)
|
||||
|
||||
if not self.has_change:
|
||||
print("Up to date. No changes found")
|
||||
self.log.info("No changes found")
|
||||
print("✅ | Up to date. No changes found")
|
|
@ -8,6 +8,8 @@ class Storage:
|
|||
self._service = None
|
||||
self.service = os.getenv("SERVICE_NAME")
|
||||
|
||||
self.error = None
|
||||
|
||||
@property
|
||||
def service(self):
|
||||
return self._service
|
||||
|
@ -27,4 +29,7 @@ class Storage:
|
|||
return values
|
||||
|
||||
def upload(self, *argv):
|
||||
return self.service.upload(*argv)
|
||||
upload = self.service.upload(*argv)
|
||||
self.error = self.service.error
|
||||
|
||||
return upload
|
||||
|
|
40
src/cloud/aws.py
Normal file
40
src/cloud/aws.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import os
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from ..fs.utils import get_file
|
||||
|
||||
class StorageClient:
|
||||
def __init__(self):
|
||||
self.set_access_key()
|
||||
self.client = boto3.client("s3")
|
||||
|
||||
self._error = None
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
return self._error
|
||||
|
||||
@error.setter
|
||||
def error(self, state):
|
||||
self._error = state
|
||||
|
||||
# Get IAM user access key and ID
|
||||
def set_access_key(self):
|
||||
key = os.getenv("SERVICE_KEY").split(";")
|
||||
if len(key) != 2:
|
||||
self.error = "Invalid AWS service key"
|
||||
return False
|
||||
|
||||
os.environ["aws_access_key_id"] = key[0]
|
||||
os.environ["aws_secret_access_key"] = key[1]
|
||||
|
||||
def upload(self, path: str) -> bool:
|
||||
name = get_file(path)
|
||||
|
||||
try:
|
||||
resp = self.client.upload_file(path, os.getenv("TARGET_BUCKET"), name)
|
||||
except ClientError as e:
|
||||
self.error = e
|
||||
return False
|
||||
return True
|
31
src/cloud/azure.py
Normal file
31
src/cloud/azure.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
import os
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
|
||||
from ..fs.utils import get_file
|
||||
|
||||
class StorageClient:
|
||||
def __init__(self):
|
||||
self.client = BlobServiceClient.from_connection_string(os.getenv("SERVICE_KEY"))
|
||||
|
||||
self._error = None
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
return self._error
|
||||
|
||||
@error.setter
|
||||
def error(self, state):
|
||||
self._error = state
|
||||
|
||||
def upload(self, path: str) -> bool:
|
||||
name = get_file(path)
|
||||
blob = self.client.get_blob_client(container=os.getenv("TARGET_BUCKET"), blob=name)
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
blob.upload_blob(f,overwrite=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
if e.response.status_code == 403:
|
||||
self.error = "Azure: Access key invalid or lacking required permissions"
|
||||
return False
|
|
@ -11,10 +11,21 @@ class StorageClient:
|
|||
client = storage.Client()
|
||||
self.bucket = client.bucket(self.get_bucket())
|
||||
|
||||
self._error = None
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
return self._error
|
||||
|
||||
@error.setter
|
||||
def error(self, state):
|
||||
self._error = state
|
||||
|
||||
def get_bucket(self):
|
||||
return os.getenv("TARGET_BUCKET")
|
||||
|
||||
def upload(self, path: str) -> bool:
|
||||
self.error = None
|
||||
name = get_file(path)
|
||||
blob = self.bucket.blob(name)
|
||||
|
||||
|
@ -22,5 +33,7 @@ class StorageClient:
|
|||
with open(path, "rb") as f:
|
||||
blob.upload_from_file(f)
|
||||
return True
|
||||
except:
|
||||
except Exception as e:
|
||||
if e.response.status_code == 403:
|
||||
self.error = "GCS: Forbidden: Account lacks 'storage.objects.create' role on target bucket"
|
||||
return False
|
|
@ -27,15 +27,18 @@ class Database(SQLite):
|
|||
|
||||
# Check if item exists in the database
|
||||
def item_exists(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
|
||||
res = self.query(sql)
|
||||
sql = "SELECT anchor FROM manifest WHERE anchor = ?"
|
||||
res = self.query(sql, (item[0],))
|
||||
|
||||
return res
|
||||
|
||||
# Check if item should be backed up by comparing mtime and checksum
|
||||
def check_item(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
|
||||
db_item = self.query(sql)
|
||||
if os.getenv("FORCE_UPLOAD"):
|
||||
return True
|
||||
|
||||
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = ?"
|
||||
db_item = self.query(sql, (item[0],))
|
||||
|
||||
# New item or item changed, so back it up
|
||||
if not db_item or (item != db_item[0]):
|
||||
|
@ -44,10 +47,12 @@ class Database(SQLite):
|
|||
|
||||
# Insert or update item in database
|
||||
def set_item(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
|
||||
values = [item[0], item[1], item[0]]
|
||||
sql = "UPDATE manifest SET anchor = ?, chksum = ? WHERE anchor = ?"
|
||||
|
||||
if not self.item_exists(item):
|
||||
sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
|
||||
self.query(sql)
|
||||
sql = f"INSERT INTO manifest ({self.columns}) VALUES (?, ?)"
|
||||
values.pop()
|
||||
self.query(sql, values)
|
||||
|
||||
return True
|
|
@ -1,16 +0,0 @@
|
|||
from .sqlite import SQLite
|
||||
|
||||
class Flags(SQLite):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self._columns = ["k", "v"]
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return ",".join(self._columns)
|
||||
|
||||
@columns.setter
|
||||
def columns(self, columns: list):
|
||||
self._columns = columns
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import pathlib
|
||||
import sqlite3 as sqlite
|
||||
from typing import Iterable
|
||||
|
||||
dbname = "._cloudbackup.db"
|
||||
|
||||
|
@ -23,8 +24,8 @@ class SQLite():
|
|||
return " ".join([s.strip() for s in sql.splitlines()])
|
||||
|
||||
# Run SQL query
|
||||
def query(self, sql: str):
|
||||
query = self.cursor.execute(sql)
|
||||
def query(self, sql: str, params: Iterable = ()):
|
||||
query = self.cursor.execute(sql, params)
|
||||
self.db.commit()
|
||||
|
||||
result = query.fetchall()
|
||||
|
@ -61,8 +62,8 @@ class SQLite():
|
|||
if envar:
|
||||
return envar
|
||||
|
||||
sql = f"SELECT v FROM flags WHERE k = '{key}'"
|
||||
res = self.query(sql)
|
||||
sql = "SELECT v FROM flags WHERE k = ?"
|
||||
res = self.query(sql, [key])
|
||||
|
||||
if not res:
|
||||
return False
|
||||
|
|
|
@ -26,7 +26,8 @@ class FileSystem:
|
|||
|
||||
@staticmethod
|
||||
def zip(item) -> str:
|
||||
dest = f"{tempfile.gettempdir()}/{str(item[1])}"
|
||||
name = FileSystem.chksum(item[0])
|
||||
dest = f"{tempfile.gettempdir()}/{name}"
|
||||
|
||||
# Make a temp zip file of single file or folder
|
||||
if file_exists(item[0]):
|
||||
|
|
Loading…
Add table
Reference in a new issue