mirror of
https://codeberg.org/vlw/cloud-backup.git
synced 2025-09-13 17:43:42 +02:00
pre-v1.0 (#1)
* wip(22w8a): add wip db and gcs client * wip(22w8b): bootstrapping fix * wip(22w8c): add first-run sql config * wip(22w8d): add sqlite abstraction * wip(22w8e): add filesystem handler * wip(22w8f): add folder walker * wip(22w8a): finish db writer * wip(22w9a): add item zipper * wip(22w9b): add gcs upload * Create README.md Co-authored-by: Cloud Shell <cloud-shell@victor-westerlund.iam.gserviceaccount.com>
This commit is contained in:
parent
9071d6d9fe
commit
247e6732bf
17 changed files with 468 additions and 0 deletions
7
.env.example
Normal file
7
.env.example
Normal file
|
@ -0,0 +1,7 @@
|
|||
SOURCE_FOLDER=
|
||||
TARGET_BUCKET=
|
||||
|
||||
# Cloud provider "gcs, aws, azure"
|
||||
SERVICE_NAME=
|
||||
# Path to service account key file
|
||||
SERVICE_KEY=
|
52
.gitignore
vendored
Normal file
52
.gitignore
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
# Bootstrapping #
|
||||
#################
|
||||
/node_modules
|
||||
/public/hot
|
||||
/public/storage
|
||||
/storage/*.key
|
||||
/vendor
|
||||
.env
|
||||
.env.backup
|
||||
.phpunit.result.cache
|
||||
Homestead.json
|
||||
Homestead.yaml
|
||||
npm-debug.log
|
||||
yarn-error.log
|
||||
public/robots.txt
|
||||
__pycache__
|
||||
*.pyc
|
||||
|
||||
# OS generated files #
|
||||
######################
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
Icon?
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.directory
|
||||
|
||||
# Tool specific files #
|
||||
#######################
|
||||
# vim
|
||||
*~
|
||||
*.swp
|
||||
*.swo
|
||||
# sublime text & textmate
|
||||
*.sublime-*
|
||||
*.stTheme.cache
|
||||
*.tmlanguage.cache
|
||||
*.tmPreferences.cache
|
||||
# Eclipse
|
||||
.settings/*
|
||||
# JetBrains, aka PHPStorm, IntelliJ IDEA
|
||||
.idea/*
|
||||
# NetBeans
|
||||
nbproject/*
|
||||
# Visual Studio Code
|
||||
.vscode
|
||||
.theia
|
||||
# Sass preprocessor
|
||||
.sass-cache/
|
43
README.md
Normal file
43
README.md
Normal file
|
@ -0,0 +1,43 @@
|
|||
# Cloud Backup
|
||||
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
|
||||
|
||||
## Get started
|
||||
This program requires Python 3.6 or newer with PIP.
|
||||
|
||||
1. **Clone this repo**
|
||||
```
|
||||
git clone https://github.com/VictorWesterlund/cloud-backup
|
||||
```
|
||||
|
||||
2. **Install dependencies**
|
||||
```
|
||||
python3 -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. **Copy environment variables file**
|
||||
```
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
4. **Edit environment variables**
|
||||
Open `.env` with your text editor of choice and fill out these required variables
|
||||
```bash
|
||||
# Path to the local folder to back up
|
||||
SOURCE_FOLDER=
|
||||
# Name of the remote bucket (destination)
|
||||
TARGET_BUCKET=
|
||||
|
||||
# Cloud provider (gcs, s3, azure)
|
||||
SERVICE_NAME=
|
||||
# Path to service account key file
|
||||
SERVICE_KEY=
|
||||
```
|
||||
|
||||
5. **Run backup script**
|
||||
```
|
||||
python3 backup.py
|
||||
```
|
||||
|
||||
Second-level files and folders should now start uploading to your destination bucket as zip archives.
|
||||
Subsequent runs of the `backup.py` script will only upload changed files and folders.
|
||||
In-fact; modified state is cached locally and doesn't request anything from your cloud provider.
|
5
backup.py
Normal file
5
backup.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
import sys
|
||||
|
||||
from src import Backup
|
||||
|
||||
Backup().backup_all()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
python-dotenv
|
||||
google-cloud-storage
|
9
src/__init__.py
Normal file
9
src/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from dotenv import load_dotenv
|
||||
from .db import Database, dbname
|
||||
from .fs import FileSystem, file_exists
|
||||
from .backup import Backup
|
||||
|
||||
if not file_exists(".env"):
|
||||
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
|
||||
|
||||
load_dotenv()
|
64
src/backup.py
Normal file
64
src/backup.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
from typing import Union
|
||||
|
||||
from .cloud import Storage as StorageClient
|
||||
from . import Database, FileSystem
|
||||
from . import dbname
|
||||
|
||||
class Backup(FileSystem):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.has_change = False
|
||||
|
||||
self.db = Database()
|
||||
self.cloud = StorageClient()
|
||||
|
||||
self.compress = self.db.get_flag("COMPRESS")
|
||||
|
||||
# Backup a file or folder
|
||||
def backup_item(self, item: Union[list, str]) -> bool:
|
||||
if isinstance(item, str):
|
||||
item = self.get_item(item)
|
||||
|
||||
# Check item against db if it has changed
|
||||
db_resp = self.db.check_item(item)
|
||||
if not db_resp:
|
||||
return
|
||||
|
||||
# Back up changes to database in silence
|
||||
if item[0].endswith(dbname):
|
||||
self.db.set_item(item)
|
||||
return
|
||||
|
||||
self.has_change = True
|
||||
|
||||
print(f"Uploading: '{item[0]}' ... ", end="")
|
||||
|
||||
blob = item
|
||||
# Upload as zip archive
|
||||
if self.compress:
|
||||
blob = FileSystem.zip(blob)
|
||||
|
||||
# Upload to cloud
|
||||
if self.cloud.upload(blob):
|
||||
# Update local database
|
||||
if self.db.set_item(item):
|
||||
print("OK")
|
||||
else:
|
||||
print("OK, but failed to update database")
|
||||
else:
|
||||
print("FAILED")
|
||||
|
||||
# Remove temp zip
|
||||
if self.compress:
|
||||
FileSystem.delete(blob)
|
||||
return
|
||||
|
||||
# Scan TARGET_FOLDER for files and folders to back up
|
||||
def backup_all(self):
|
||||
# Check all second-level files and folder at target path
|
||||
for item in self.all():
|
||||
self.backup_item(item)
|
||||
|
||||
if not self.has_change:
|
||||
print("Up to date. No changes found")
|
30
src/cloud/__init__.py
Normal file
30
src/cloud/__init__.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
import importlib
|
||||
|
||||
# This class initializes only the module for the requested service.
|
||||
# It sits as an intermediate between the initiator script and client library.
|
||||
class Storage:
|
||||
def __init__(self):
|
||||
self._service = None
|
||||
self.service = os.getenv("SERVICE_NAME")
|
||||
|
||||
@property
|
||||
def service(self):
|
||||
return self._service
|
||||
|
||||
# Create a new storage client for the requested service
|
||||
@service.setter
|
||||
def service(self, service: str):
|
||||
if not service:
|
||||
service = "gcs"
|
||||
module = importlib.import_module("src.cloud." + service)
|
||||
|
||||
self._service = module.StorageClient()
|
||||
|
||||
@staticmethod
|
||||
def get_args(values):
|
||||
values.pop(-1)
|
||||
return values
|
||||
|
||||
def upload(self, *argv):
|
||||
return self.service.upload(*argv)
|
26
src/cloud/gcs.py
Normal file
26
src/cloud/gcs.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
import os
|
||||
from google.cloud import storage
|
||||
|
||||
from ..fs.utils import get_file
|
||||
|
||||
# Client for Google Cloud Storage
|
||||
class StorageClient:
|
||||
def __init__(self):
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("SERVICE_KEY")
|
||||
|
||||
client = storage.Client()
|
||||
self.bucket = client.bucket(self.get_bucket())
|
||||
|
||||
def get_bucket(self):
|
||||
return os.getenv("TARGET_BUCKET")
|
||||
|
||||
def upload(self, path: str) -> bool:
|
||||
name = get_file(path)
|
||||
blob = self.bucket.blob(name)
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
blob.upload_from_file(f)
|
||||
return True
|
||||
except:
|
||||
return False
|
2
src/db/__init__.py
Normal file
2
src/db/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from .sqlite import dbname
|
||||
from .database import Database
|
15
src/db/config.sql
Normal file
15
src/db/config.sql
Normal file
|
@ -0,0 +1,15 @@
|
|||
CREATE TABLE flags (
|
||||
k TEXT PRIMARY KEY,
|
||||
v INTEGER
|
||||
);
|
||||
|
||||
CREATE TABLE manifest (
|
||||
anchor TEXT PRIMARY KEY,
|
||||
chksum INTEGER
|
||||
);
|
||||
|
||||
INSERT INTO flags
|
||||
VALUES
|
||||
("COMPRESS", 1),
|
||||
("BUCKET_OK", 0),
|
||||
("INIT", 1);
|
53
src/db/database.py
Normal file
53
src/db/database.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import os
|
||||
from typing import Union
|
||||
|
||||
from .sqlite import SQLite
|
||||
|
||||
class Database(SQLite):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self._columns = ["anchor", "chksum"]
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return ",".join(self._columns)
|
||||
|
||||
@columns.setter
|
||||
def columns(self, columns: list):
|
||||
self._columns = columns
|
||||
|
||||
# Create SQL string CSV from list
|
||||
@staticmethod
|
||||
def str_csv(items: Union[list, tuple]) -> str:
|
||||
items = list(map(lambda value : f"'{str(value)}'", items))
|
||||
items = ",".join(items)
|
||||
|
||||
return items
|
||||
|
||||
# Check if item exists in the database
|
||||
def item_exists(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
|
||||
res = self.query(sql)
|
||||
|
||||
return res
|
||||
|
||||
# Check if item should be backed up by comparing mtime and checksum
|
||||
def check_item(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
|
||||
db_item = self.query(sql)
|
||||
|
||||
# New item or item changed, so back it up
|
||||
if not db_item or (item != db_item[0]):
|
||||
return True
|
||||
return False
|
||||
|
||||
# Insert or update item in database
|
||||
def set_item(self, item: Union[list, tuple]) -> bool:
|
||||
sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
|
||||
|
||||
if not self.item_exists(item):
|
||||
sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
|
||||
self.query(sql)
|
||||
|
||||
return True
|
16
src/db/flags.py
Normal file
16
src/db/flags.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
from .sqlite import SQLite
|
||||
|
||||
class Flags(SQLite):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self._columns = ["k", "v"]
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return ",".join(self._columns)
|
||||
|
||||
@columns.setter
|
||||
def columns(self, columns: list):
|
||||
self._columns = columns
|
||||
|
69
src/db/sqlite.py
Normal file
69
src/db/sqlite.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import os
|
||||
import pathlib
|
||||
import sqlite3 as sqlite
|
||||
|
||||
dbname = "._cloudbackup.db"
|
||||
|
||||
class SQLite():
|
||||
def __init__(self):
|
||||
self.db = sqlite.connect(self.get_db_path())
|
||||
self.cursor = self.db.cursor()
|
||||
|
||||
# Check if the database requires configuration
|
||||
try:
|
||||
db_exists = self.get_flag("INIT")
|
||||
if not db_exists:
|
||||
self.configure_db()
|
||||
except sqlite.OperationalError:
|
||||
self.configure_db()
|
||||
|
||||
# Strip linebreaks from pretty-printed SQL
|
||||
@staticmethod
|
||||
def format_query(sql: str) -> str:
|
||||
return " ".join([s.strip() for s in sql.splitlines()])
|
||||
|
||||
# Run SQL query
|
||||
def query(self, sql: str):
|
||||
query = self.cursor.execute(sql)
|
||||
self.db.commit()
|
||||
|
||||
result = query.fetchall()
|
||||
if len(result) < 1:
|
||||
return False
|
||||
|
||||
return result
|
||||
|
||||
# Get path to database file
|
||||
def get_db_path(self) -> str:
|
||||
path = os.getenv("SOURCE_FOLDER")
|
||||
|
||||
# Append db file name if absent
|
||||
if not path.endswith(dbname):
|
||||
# Append tailing slash if absent
|
||||
if path[-1] != "/":
|
||||
path += "/"
|
||||
path += dbname
|
||||
return path
|
||||
|
||||
# Prepare a fresh db with the expected table structure
|
||||
def configure_db(self):
|
||||
cwd = str(pathlib.Path(__file__).parent.resolve())
|
||||
|
||||
sql = open(cwd + "/config.sql")
|
||||
sql_str = SQLite.format_query(sql.read())
|
||||
|
||||
return self.cursor.executescript(sql_str)
|
||||
|
||||
# Get value from flag by key or .env override
|
||||
def get_flag(self, key: str) -> bool:
|
||||
# Return environment variable override
|
||||
envar = os.getenv(key)
|
||||
if envar:
|
||||
return envar
|
||||
|
||||
sql = f"SELECT v FROM flags WHERE k = '{key}'"
|
||||
res = self.query(sql)
|
||||
|
||||
if not res:
|
||||
return False
|
||||
return True
|
2
src/fs/__init__.py
Normal file
2
src/fs/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from .utils import file_exists, get_parent, get_file
|
||||
from .fs import FileSystem
|
58
src/fs/fs.py
Normal file
58
src/fs/fs.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
import os
|
||||
import zlib
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from ..db import dbname
|
||||
from .utils import file_exists, get_parent, get_file
|
||||
|
||||
class FileSystem:
|
||||
def __init__(self):
|
||||
self.path = FileSystem.get_path()
|
||||
|
||||
@staticmethod
|
||||
def get_path() -> str:
|
||||
return os.getenv("SOURCE_FOLDER")
|
||||
|
||||
# Calculate a CRC32 checksum of provided data
|
||||
@staticmethod
|
||||
def chksum(data: str) -> str:
|
||||
encoded = data.encode("utf-8")
|
||||
return zlib.crc32(encoded)
|
||||
|
||||
@staticmethod
|
||||
def delete(path: str) -> bool:
|
||||
return os.remove(path)
|
||||
|
||||
@staticmethod
|
||||
def zip(item) -> str:
|
||||
dest = f"{tempfile.gettempdir()}/{str(item[1])}"
|
||||
|
||||
# Make a temp zip file of single file or folder
|
||||
if file_exists(item[0]):
|
||||
return shutil.make_archive(dest, "zip", get_parent(item[0]), get_file(item[0]))
|
||||
return shutil.make_archive(dest, "zip", item[0])
|
||||
|
||||
# Get metadata from candidate file or folder
|
||||
def get_item(self, path: str) -> tuple:
|
||||
# Ignore SQLite temp files
|
||||
if path.endswith(".db-journal"):
|
||||
return False
|
||||
|
||||
mtime = os.path.getmtime(path)
|
||||
chksum = FileSystem.chksum(path + str(mtime))
|
||||
|
||||
data = (path, chksum)
|
||||
return data
|
||||
|
||||
# Get all second-level files and folders for path
|
||||
def all(self) -> list:
|
||||
content = [os.path.join(self.path, f) for f in os.listdir(self.path)]
|
||||
items = []
|
||||
|
||||
for item in content:
|
||||
data = self.get_item(item)
|
||||
if data:
|
||||
items.append(data)
|
||||
|
||||
return items
|
15
src/fs/utils.py
Normal file
15
src/fs/utils.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
import os.path
|
||||
import ntpath
|
||||
|
||||
# Check if a file exists
|
||||
def file_exists(file: str) -> bool:
|
||||
return os.path.isfile(file)
|
||||
|
||||
# Get parent directory of file
|
||||
def get_parent(path: str) -> str:
|
||||
return os.path.dirname(path)
|
||||
|
||||
# Get filename from path string
|
||||
def get_file(path: str) -> str:
|
||||
head, tail = ntpath.split(path)
|
||||
return tail or ntpath.basename(head)
|
Loading…
Add table
Reference in a new issue