mirror of
https://codeberg.org/vlw/cloud-backup.git
synced 2025-09-14 01:53:42 +02:00
pre-v1.0 (#1)
* wip(22w8a): add wip db and gcs client * wip(22w8b): bootstrapping fix * wip(22w8c): add first-run sql config * wip(22w8d): add sqlite abstraction * wip(22w8e): add filesystem handler * wip(22w8f): add folder walker * wip(22w8a): finish db writer * wip(22w9a): add item zipper * wip(22w9b): add gcs upload * Create README.md Co-authored-by: Cloud Shell <cloud-shell@victor-westerlund.iam.gserviceaccount.com>
This commit is contained in:
parent
9071d6d9fe
commit
247e6732bf
17 changed files with 468 additions and 0 deletions
7
.env.example
Normal file
7
.env.example
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
SOURCE_FOLDER=
|
||||||
|
TARGET_BUCKET=
|
||||||
|
|
||||||
|
# Cloud provider "gcs, aws, azure"
|
||||||
|
SERVICE_NAME=
|
||||||
|
# Path to service account key file
|
||||||
|
SERVICE_KEY=
|
52
.gitignore
vendored
Normal file
52
.gitignore
vendored
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# Bootstrapping #
|
||||||
|
#################
|
||||||
|
/node_modules
|
||||||
|
/public/hot
|
||||||
|
/public/storage
|
||||||
|
/storage/*.key
|
||||||
|
/vendor
|
||||||
|
.env
|
||||||
|
.env.backup
|
||||||
|
.phpunit.result.cache
|
||||||
|
Homestead.json
|
||||||
|
Homestead.yaml
|
||||||
|
npm-debug.log
|
||||||
|
yarn-error.log
|
||||||
|
public/robots.txt
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
# OS generated files #
|
||||||
|
######################
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
Icon?
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Tool specific files #
|
||||||
|
#######################
|
||||||
|
# vim
|
||||||
|
*~
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
# sublime text & textmate
|
||||||
|
*.sublime-*
|
||||||
|
*.stTheme.cache
|
||||||
|
*.tmlanguage.cache
|
||||||
|
*.tmPreferences.cache
|
||||||
|
# Eclipse
|
||||||
|
.settings/*
|
||||||
|
# JetBrains, aka PHPStorm, IntelliJ IDEA
|
||||||
|
.idea/*
|
||||||
|
# NetBeans
|
||||||
|
nbproject/*
|
||||||
|
# Visual Studio Code
|
||||||
|
.vscode
|
||||||
|
.theia
|
||||||
|
# Sass preprocessor
|
||||||
|
.sass-cache/
|
43
README.md
Normal file
43
README.md
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# Cloud Backup
|
||||||
|
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
|
||||||
|
|
||||||
|
## Get started
|
||||||
|
This program requires Python 3.6 or newer with PIP.
|
||||||
|
|
||||||
|
1. **Clone this repo**
|
||||||
|
```
|
||||||
|
git clone https://github.com/VictorWesterlund/cloud-backup
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Install dependencies**
|
||||||
|
```
|
||||||
|
python3 -m pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Copy environment variables file**
|
||||||
|
```
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Edit environment variables**
|
||||||
|
Open `.env` with your text editor of choice and fill out these required variables
|
||||||
|
```bash
|
||||||
|
# Path to the local folder to back up
|
||||||
|
SOURCE_FOLDER=
|
||||||
|
# Name of the remote bucket (destination)
|
||||||
|
TARGET_BUCKET=
|
||||||
|
|
||||||
|
# Cloud provider (gcs, s3, azure)
|
||||||
|
SERVICE_NAME=
|
||||||
|
# Path to service account key file
|
||||||
|
SERVICE_KEY=
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Run backup script**
|
||||||
|
```
|
||||||
|
python3 backup.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Second-level files and folders should now start uploading to your destination bucket as zip archives.
|
||||||
|
Subsequent runs of the `backup.py` script will only upload changed files and folders.
|
||||||
|
In-fact; modified state is cached locally and doesn't request anything from your cloud provider.
|
5
backup.py
Normal file
5
backup.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from src import Backup
|
||||||
|
|
||||||
|
Backup().backup_all()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
python-dotenv
|
||||||
|
google-cloud-storage
|
9
src/__init__.py
Normal file
9
src/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from .db import Database, dbname
|
||||||
|
from .fs import FileSystem, file_exists
|
||||||
|
from .backup import Backup
|
||||||
|
|
||||||
|
if not file_exists(".env"):
|
||||||
|
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
|
||||||
|
|
||||||
|
load_dotenv()
|
64
src/backup.py
Normal file
64
src/backup.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from .cloud import Storage as StorageClient
|
||||||
|
from . import Database, FileSystem
|
||||||
|
from . import dbname
|
||||||
|
|
||||||
|
class Backup(FileSystem):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.has_change = False
|
||||||
|
|
||||||
|
self.db = Database()
|
||||||
|
self.cloud = StorageClient()
|
||||||
|
|
||||||
|
self.compress = self.db.get_flag("COMPRESS")
|
||||||
|
|
||||||
|
# Backup a file or folder
|
||||||
|
def backup_item(self, item: Union[list, str]) -> bool:
|
||||||
|
if isinstance(item, str):
|
||||||
|
item = self.get_item(item)
|
||||||
|
|
||||||
|
# Check item against db if it has changed
|
||||||
|
db_resp = self.db.check_item(item)
|
||||||
|
if not db_resp:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Back up changes to database in silence
|
||||||
|
if item[0].endswith(dbname):
|
||||||
|
self.db.set_item(item)
|
||||||
|
return
|
||||||
|
|
||||||
|
self.has_change = True
|
||||||
|
|
||||||
|
print(f"Uploading: '{item[0]}' ... ", end="")
|
||||||
|
|
||||||
|
blob = item
|
||||||
|
# Upload as zip archive
|
||||||
|
if self.compress:
|
||||||
|
blob = FileSystem.zip(blob)
|
||||||
|
|
||||||
|
# Upload to cloud
|
||||||
|
if self.cloud.upload(blob):
|
||||||
|
# Update local database
|
||||||
|
if self.db.set_item(item):
|
||||||
|
print("OK")
|
||||||
|
else:
|
||||||
|
print("OK, but failed to update database")
|
||||||
|
else:
|
||||||
|
print("FAILED")
|
||||||
|
|
||||||
|
# Remove temp zip
|
||||||
|
if self.compress:
|
||||||
|
FileSystem.delete(blob)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Scan TARGET_FOLDER for files and folders to back up
|
||||||
|
def backup_all(self):
|
||||||
|
# Check all second-level files and folder at target path
|
||||||
|
for item in self.all():
|
||||||
|
self.backup_item(item)
|
||||||
|
|
||||||
|
if not self.has_change:
|
||||||
|
print("Up to date. No changes found")
|
30
src/cloud/__init__.py
Normal file
30
src/cloud/__init__.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
# This class initializes only the module for the requested service.
|
||||||
|
# It sits as an intermediate between the initiator script and client library.
|
||||||
|
class Storage:
|
||||||
|
def __init__(self):
|
||||||
|
self._service = None
|
||||||
|
self.service = os.getenv("SERVICE_NAME")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def service(self):
|
||||||
|
return self._service
|
||||||
|
|
||||||
|
# Create a new storage client for the requested service
|
||||||
|
@service.setter
|
||||||
|
def service(self, service: str):
|
||||||
|
if not service:
|
||||||
|
service = "gcs"
|
||||||
|
module = importlib.import_module("src.cloud." + service)
|
||||||
|
|
||||||
|
self._service = module.StorageClient()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_args(values):
|
||||||
|
values.pop(-1)
|
||||||
|
return values
|
||||||
|
|
||||||
|
def upload(self, *argv):
|
||||||
|
return self.service.upload(*argv)
|
26
src/cloud/gcs.py
Normal file
26
src/cloud/gcs.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
import os
|
||||||
|
from google.cloud import storage
|
||||||
|
|
||||||
|
from ..fs.utils import get_file
|
||||||
|
|
||||||
|
# Client for Google Cloud Storage
|
||||||
|
class StorageClient:
|
||||||
|
def __init__(self):
|
||||||
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("SERVICE_KEY")
|
||||||
|
|
||||||
|
client = storage.Client()
|
||||||
|
self.bucket = client.bucket(self.get_bucket())
|
||||||
|
|
||||||
|
def get_bucket(self):
|
||||||
|
return os.getenv("TARGET_BUCKET")
|
||||||
|
|
||||||
|
def upload(self, path: str) -> bool:
|
||||||
|
name = get_file(path)
|
||||||
|
blob = self.bucket.blob(name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
blob.upload_from_file(f)
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
2
src/db/__init__.py
Normal file
2
src/db/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
from .sqlite import dbname
|
||||||
|
from .database import Database
|
15
src/db/config.sql
Normal file
15
src/db/config.sql
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
CREATE TABLE flags (
|
||||||
|
k TEXT PRIMARY KEY,
|
||||||
|
v INTEGER
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE manifest (
|
||||||
|
anchor TEXT PRIMARY KEY,
|
||||||
|
chksum INTEGER
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO flags
|
||||||
|
VALUES
|
||||||
|
("COMPRESS", 1),
|
||||||
|
("BUCKET_OK", 0),
|
||||||
|
("INIT", 1);
|
53
src/db/database.py
Normal file
53
src/db/database.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import os
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from .sqlite import SQLite
|
||||||
|
|
||||||
|
class Database(SQLite):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self._columns = ["anchor", "chksum"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def columns(self):
|
||||||
|
return ",".join(self._columns)
|
||||||
|
|
||||||
|
@columns.setter
|
||||||
|
def columns(self, columns: list):
|
||||||
|
self._columns = columns
|
||||||
|
|
||||||
|
# Create SQL string CSV from list
|
||||||
|
@staticmethod
|
||||||
|
def str_csv(items: Union[list, tuple]) -> str:
|
||||||
|
items = list(map(lambda value : f"'{str(value)}'", items))
|
||||||
|
items = ",".join(items)
|
||||||
|
|
||||||
|
return items
|
||||||
|
|
||||||
|
# Check if item exists in the database
|
||||||
|
def item_exists(self, item: Union[list, tuple]) -> bool:
|
||||||
|
sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
|
||||||
|
res = self.query(sql)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
# Check if item should be backed up by comparing mtime and checksum
|
||||||
|
def check_item(self, item: Union[list, tuple]) -> bool:
|
||||||
|
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
|
||||||
|
db_item = self.query(sql)
|
||||||
|
|
||||||
|
# New item or item changed, so back it up
|
||||||
|
if not db_item or (item != db_item[0]):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Insert or update item in database
|
||||||
|
def set_item(self, item: Union[list, tuple]) -> bool:
|
||||||
|
sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
|
||||||
|
|
||||||
|
if not self.item_exists(item):
|
||||||
|
sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
|
||||||
|
self.query(sql)
|
||||||
|
|
||||||
|
return True
|
16
src/db/flags.py
Normal file
16
src/db/flags.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
from .sqlite import SQLite
|
||||||
|
|
||||||
|
class Flags(SQLite):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self._columns = ["k", "v"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def columns(self):
|
||||||
|
return ",".join(self._columns)
|
||||||
|
|
||||||
|
@columns.setter
|
||||||
|
def columns(self, columns: list):
|
||||||
|
self._columns = columns
|
||||||
|
|
69
src/db/sqlite.py
Normal file
69
src/db/sqlite.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import sqlite3 as sqlite
|
||||||
|
|
||||||
|
dbname = "._cloudbackup.db"
|
||||||
|
|
||||||
|
class SQLite():
|
||||||
|
def __init__(self):
|
||||||
|
self.db = sqlite.connect(self.get_db_path())
|
||||||
|
self.cursor = self.db.cursor()
|
||||||
|
|
||||||
|
# Check if the database requires configuration
|
||||||
|
try:
|
||||||
|
db_exists = self.get_flag("INIT")
|
||||||
|
if not db_exists:
|
||||||
|
self.configure_db()
|
||||||
|
except sqlite.OperationalError:
|
||||||
|
self.configure_db()
|
||||||
|
|
||||||
|
# Strip linebreaks from pretty-printed SQL
|
||||||
|
@staticmethod
|
||||||
|
def format_query(sql: str) -> str:
|
||||||
|
return " ".join([s.strip() for s in sql.splitlines()])
|
||||||
|
|
||||||
|
# Run SQL query
|
||||||
|
def query(self, sql: str):
|
||||||
|
query = self.cursor.execute(sql)
|
||||||
|
self.db.commit()
|
||||||
|
|
||||||
|
result = query.fetchall()
|
||||||
|
if len(result) < 1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Get path to database file
|
||||||
|
def get_db_path(self) -> str:
|
||||||
|
path = os.getenv("SOURCE_FOLDER")
|
||||||
|
|
||||||
|
# Append db file name if absent
|
||||||
|
if not path.endswith(dbname):
|
||||||
|
# Append tailing slash if absent
|
||||||
|
if path[-1] != "/":
|
||||||
|
path += "/"
|
||||||
|
path += dbname
|
||||||
|
return path
|
||||||
|
|
||||||
|
# Prepare a fresh db with the expected table structure
|
||||||
|
def configure_db(self):
|
||||||
|
cwd = str(pathlib.Path(__file__).parent.resolve())
|
||||||
|
|
||||||
|
sql = open(cwd + "/config.sql")
|
||||||
|
sql_str = SQLite.format_query(sql.read())
|
||||||
|
|
||||||
|
return self.cursor.executescript(sql_str)
|
||||||
|
|
||||||
|
# Get value from flag by key or .env override
|
||||||
|
def get_flag(self, key: str) -> bool:
|
||||||
|
# Return environment variable override
|
||||||
|
envar = os.getenv(key)
|
||||||
|
if envar:
|
||||||
|
return envar
|
||||||
|
|
||||||
|
sql = f"SELECT v FROM flags WHERE k = '{key}'"
|
||||||
|
res = self.query(sql)
|
||||||
|
|
||||||
|
if not res:
|
||||||
|
return False
|
||||||
|
return True
|
2
src/fs/__init__.py
Normal file
2
src/fs/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
from .utils import file_exists, get_parent, get_file
|
||||||
|
from .fs import FileSystem
|
58
src/fs/fs.py
Normal file
58
src/fs/fs.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
import os
|
||||||
|
import zlib
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from ..db import dbname
|
||||||
|
from .utils import file_exists, get_parent, get_file
|
||||||
|
|
||||||
|
class FileSystem:
|
||||||
|
def __init__(self):
|
||||||
|
self.path = FileSystem.get_path()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_path() -> str:
|
||||||
|
return os.getenv("SOURCE_FOLDER")
|
||||||
|
|
||||||
|
# Calculate a CRC32 checksum of provided data
|
||||||
|
@staticmethod
|
||||||
|
def chksum(data: str) -> str:
|
||||||
|
encoded = data.encode("utf-8")
|
||||||
|
return zlib.crc32(encoded)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def delete(path: str) -> bool:
|
||||||
|
return os.remove(path)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def zip(item) -> str:
|
||||||
|
dest = f"{tempfile.gettempdir()}/{str(item[1])}"
|
||||||
|
|
||||||
|
# Make a temp zip file of single file or folder
|
||||||
|
if file_exists(item[0]):
|
||||||
|
return shutil.make_archive(dest, "zip", get_parent(item[0]), get_file(item[0]))
|
||||||
|
return shutil.make_archive(dest, "zip", item[0])
|
||||||
|
|
||||||
|
# Get metadata from candidate file or folder
|
||||||
|
def get_item(self, path: str) -> tuple:
|
||||||
|
# Ignore SQLite temp files
|
||||||
|
if path.endswith(".db-journal"):
|
||||||
|
return False
|
||||||
|
|
||||||
|
mtime = os.path.getmtime(path)
|
||||||
|
chksum = FileSystem.chksum(path + str(mtime))
|
||||||
|
|
||||||
|
data = (path, chksum)
|
||||||
|
return data
|
||||||
|
|
||||||
|
# Get all second-level files and folders for path
|
||||||
|
def all(self) -> list:
|
||||||
|
content = [os.path.join(self.path, f) for f in os.listdir(self.path)]
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for item in content:
|
||||||
|
data = self.get_item(item)
|
||||||
|
if data:
|
||||||
|
items.append(data)
|
||||||
|
|
||||||
|
return items
|
15
src/fs/utils.py
Normal file
15
src/fs/utils.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
import os.path
|
||||||
|
import ntpath
|
||||||
|
|
||||||
|
# Check if a file exists
|
||||||
|
def file_exists(file: str) -> bool:
|
||||||
|
return os.path.isfile(file)
|
||||||
|
|
||||||
|
# Get parent directory of file
|
||||||
|
def get_parent(path: str) -> str:
|
||||||
|
return os.path.dirname(path)
|
||||||
|
|
||||||
|
# Get filename from path string
|
||||||
|
def get_file(path: str) -> str:
|
||||||
|
head, tail = ntpath.split(path)
|
||||||
|
return tail or ntpath.basename(head)
|
Loading…
Add table
Reference in a new issue