pre-v1.0 (#1)

* wip(22w8a): add wip db and gcs client

* wip(22w8b): bootstrapping fix

* wip(22w8c): add first-run sql config

* wip(22w8d): add sqlite abstraction

* wip(22w8e): add filesystem handler

* wip(22w8f): add folder walker

* wip(22w8a): finish db writer

* wip(22w9a): add item zipper

* wip(22w9b): add gcs upload

* Create README.md

Co-authored-by: Cloud Shell <cloud-shell@victor-westerlund.iam.gserviceaccount.com>
This commit is contained in:
Victor Westerlund 2022-03-02 04:06:13 +01:00 committed by GitHub
parent 9071d6d9fe
commit 247e6732bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 468 additions and 0 deletions

7
.env.example Normal file
View file

@ -0,0 +1,7 @@
SOURCE_FOLDER=
TARGET_BUCKET=
# Cloud provider "gcs, aws, azure"
SERVICE_NAME=
# Path to service account key file
SERVICE_KEY=

52
.gitignore vendored Normal file
View file

@ -0,0 +1,52 @@
# Bootstrapping #
#################
/node_modules
/public/hot
/public/storage
/storage/*.key
/vendor
.env
.env.backup
.phpunit.result.cache
Homestead.json
Homestead.yaml
npm-debug.log
yarn-error.log
public/robots.txt
__pycache__
*.pyc
# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
Icon?
ehthumbs.db
Thumbs.db
.directory
# Tool specific files #
#######################
# vim
*~
*.swp
*.swo
# sublime text & textmate
*.sublime-*
*.stTheme.cache
*.tmlanguage.cache
*.tmPreferences.cache
# Eclipse
.settings/*
# JetBrains, aka PHPStorm, IntelliJ IDEA
.idea/*
# NetBeans
nbproject/*
# Visual Studio Code
.vscode
.theia
# Sass preprocessor
.sass-cache/

43
README.md Normal file
View file

@ -0,0 +1,43 @@
# Cloud Backup
Backup and archive ordinary files and folders to Google Cloud, AWS or Azure.
## Get started
This program requires Python 3.6 or newer with PIP.
1. **Clone this repo**
```
git clone https://github.com/VictorWesterlund/cloud-backup
```
2. **Install dependencies**
```
python3 -m pip install -r requirements.txt
```
3. **Copy environment variables file**
```
cp .env.example .env
```
4. **Edit environment variables**
Open `.env` with your text editor of choice and fill out these required variables
```bash
# Path to the local folder to back up
SOURCE_FOLDER=
# Name of the remote bucket (destination)
TARGET_BUCKET=
# Cloud provider (gcs, s3, azure)
SERVICE_NAME=
# Path to service account key file
SERVICE_KEY=
```
5. **Run backup script**
```
python3 backup.py
```
Second-level files and folders should now start uploading to your destination bucket as zip archives.
Subsequent runs of the `backup.py` script will only upload changed files and folders.
In-fact; modified state is cached locally and doesn't request anything from your cloud provider.

5
backup.py Normal file
View file

@ -0,0 +1,5 @@
import sys
from src import Backup
Backup().backup_all()

2
requirements.txt Normal file
View file

@ -0,0 +1,2 @@
python-dotenv
google-cloud-storage

9
src/__init__.py Normal file
View file

@ -0,0 +1,9 @@
from dotenv import load_dotenv
from .db import Database, dbname
from .fs import FileSystem, file_exists
from .backup import Backup
if not file_exists(".env"):
raise FileNotFoundError("Environment variable file does not exist. Copy '.env.example' to '.env'")
load_dotenv()

64
src/backup.py Normal file
View file

@ -0,0 +1,64 @@
from typing import Union
from .cloud import Storage as StorageClient
from . import Database, FileSystem
from . import dbname
class Backup(FileSystem):
def __init__(self):
super().__init__()
self.has_change = False
self.db = Database()
self.cloud = StorageClient()
self.compress = self.db.get_flag("COMPRESS")
# Backup a file or folder
def backup_item(self, item: Union[list, str]) -> bool:
if isinstance(item, str):
item = self.get_item(item)
# Check item against db if it has changed
db_resp = self.db.check_item(item)
if not db_resp:
return
# Back up changes to database in silence
if item[0].endswith(dbname):
self.db.set_item(item)
return
self.has_change = True
print(f"Uploading: '{item[0]}' ... ", end="")
blob = item
# Upload as zip archive
if self.compress:
blob = FileSystem.zip(blob)
# Upload to cloud
if self.cloud.upload(blob):
# Update local database
if self.db.set_item(item):
print("OK")
else:
print("OK, but failed to update database")
else:
print("FAILED")
# Remove temp zip
if self.compress:
FileSystem.delete(blob)
return
# Scan TARGET_FOLDER for files and folders to back up
def backup_all(self):
# Check all second-level files and folder at target path
for item in self.all():
self.backup_item(item)
if not self.has_change:
print("Up to date. No changes found")

30
src/cloud/__init__.py Normal file
View file

@ -0,0 +1,30 @@
import os
import importlib
# This class initializes only the module for the requested service.
# It sits as an intermediate between the initiator script and client library.
class Storage:
def __init__(self):
self._service = None
self.service = os.getenv("SERVICE_NAME")
@property
def service(self):
return self._service
# Create a new storage client for the requested service
@service.setter
def service(self, service: str):
if not service:
service = "gcs"
module = importlib.import_module("src.cloud." + service)
self._service = module.StorageClient()
@staticmethod
def get_args(values):
values.pop(-1)
return values
def upload(self, *argv):
return self.service.upload(*argv)

26
src/cloud/gcs.py Normal file
View file

@ -0,0 +1,26 @@
import os
from google.cloud import storage
from ..fs.utils import get_file
# Client for Google Cloud Storage
class StorageClient:
def __init__(self):
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("SERVICE_KEY")
client = storage.Client()
self.bucket = client.bucket(self.get_bucket())
def get_bucket(self):
return os.getenv("TARGET_BUCKET")
def upload(self, path: str) -> bool:
name = get_file(path)
blob = self.bucket.blob(name)
try:
with open(path, "rb") as f:
blob.upload_from_file(f)
return True
except:
return False

2
src/db/__init__.py Normal file
View file

@ -0,0 +1,2 @@
from .sqlite import dbname
from .database import Database

15
src/db/config.sql Normal file
View file

@ -0,0 +1,15 @@
CREATE TABLE flags (
k TEXT PRIMARY KEY,
v INTEGER
);
CREATE TABLE manifest (
anchor TEXT PRIMARY KEY,
chksum INTEGER
);
INSERT INTO flags
VALUES
("COMPRESS", 1),
("BUCKET_OK", 0),
("INIT", 1);

53
src/db/database.py Normal file
View file

@ -0,0 +1,53 @@
import os
from typing import Union
from .sqlite import SQLite
class Database(SQLite):
def __init__(self):
super().__init__()
self._columns = ["anchor", "chksum"]
@property
def columns(self):
return ",".join(self._columns)
@columns.setter
def columns(self, columns: list):
self._columns = columns
# Create SQL string CSV from list
@staticmethod
def str_csv(items: Union[list, tuple]) -> str:
items = list(map(lambda value : f"'{str(value)}'", items))
items = ",".join(items)
return items
# Check if item exists in the database
def item_exists(self, item: Union[list, tuple]) -> bool:
sql = f"SELECT anchor FROM manifest WHERE anchor = '{item[0]}'"
res = self.query(sql)
return res
# Check if item should be backed up by comparing mtime and checksum
def check_item(self, item: Union[list, tuple]) -> bool:
sql = f"SELECT {self.columns} FROM manifest WHERE anchor = '{item[0]}'"
db_item = self.query(sql)
# New item or item changed, so back it up
if not db_item or (item != db_item[0]):
return True
return False
# Insert or update item in database
def set_item(self, item: Union[list, tuple]) -> bool:
sql = f"UPDATE manifest SET anchor = '{item[0]}', chksum = {item[1]} WHERE anchor = '{item[0]}'"
if not self.item_exists(item):
sql = f"INSERT INTO manifest ({self.columns}) VALUES ('{item[0]}', {item[1]})"
self.query(sql)
return True

16
src/db/flags.py Normal file
View file

@ -0,0 +1,16 @@
from .sqlite import SQLite
class Flags(SQLite):
def __init__(self):
super().__init__()
self._columns = ["k", "v"]
@property
def columns(self):
return ",".join(self._columns)
@columns.setter
def columns(self, columns: list):
self._columns = columns

69
src/db/sqlite.py Normal file
View file

@ -0,0 +1,69 @@
import os
import pathlib
import sqlite3 as sqlite
dbname = "._cloudbackup.db"
class SQLite():
def __init__(self):
self.db = sqlite.connect(self.get_db_path())
self.cursor = self.db.cursor()
# Check if the database requires configuration
try:
db_exists = self.get_flag("INIT")
if not db_exists:
self.configure_db()
except sqlite.OperationalError:
self.configure_db()
# Strip linebreaks from pretty-printed SQL
@staticmethod
def format_query(sql: str) -> str:
return " ".join([s.strip() for s in sql.splitlines()])
# Run SQL query
def query(self, sql: str):
query = self.cursor.execute(sql)
self.db.commit()
result = query.fetchall()
if len(result) < 1:
return False
return result
# Get path to database file
def get_db_path(self) -> str:
path = os.getenv("SOURCE_FOLDER")
# Append db file name if absent
if not path.endswith(dbname):
# Append tailing slash if absent
if path[-1] != "/":
path += "/"
path += dbname
return path
# Prepare a fresh db with the expected table structure
def configure_db(self):
cwd = str(pathlib.Path(__file__).parent.resolve())
sql = open(cwd + "/config.sql")
sql_str = SQLite.format_query(sql.read())
return self.cursor.executescript(sql_str)
# Get value from flag by key or .env override
def get_flag(self, key: str) -> bool:
# Return environment variable override
envar = os.getenv(key)
if envar:
return envar
sql = f"SELECT v FROM flags WHERE k = '{key}'"
res = self.query(sql)
if not res:
return False
return True

2
src/fs/__init__.py Normal file
View file

@ -0,0 +1,2 @@
from .utils import file_exists, get_parent, get_file
from .fs import FileSystem

58
src/fs/fs.py Normal file
View file

@ -0,0 +1,58 @@
import os
import zlib
import shutil
import tempfile
from ..db import dbname
from .utils import file_exists, get_parent, get_file
class FileSystem:
def __init__(self):
self.path = FileSystem.get_path()
@staticmethod
def get_path() -> str:
return os.getenv("SOURCE_FOLDER")
# Calculate a CRC32 checksum of provided data
@staticmethod
def chksum(data: str) -> str:
encoded = data.encode("utf-8")
return zlib.crc32(encoded)
@staticmethod
def delete(path: str) -> bool:
return os.remove(path)
@staticmethod
def zip(item) -> str:
dest = f"{tempfile.gettempdir()}/{str(item[1])}"
# Make a temp zip file of single file or folder
if file_exists(item[0]):
return shutil.make_archive(dest, "zip", get_parent(item[0]), get_file(item[0]))
return shutil.make_archive(dest, "zip", item[0])
# Get metadata from candidate file or folder
def get_item(self, path: str) -> tuple:
# Ignore SQLite temp files
if path.endswith(".db-journal"):
return False
mtime = os.path.getmtime(path)
chksum = FileSystem.chksum(path + str(mtime))
data = (path, chksum)
return data
# Get all second-level files and folders for path
def all(self) -> list:
content = [os.path.join(self.path, f) for f in os.listdir(self.path)]
items = []
for item in content:
data = self.get_item(item)
if data:
items.append(data)
return items

15
src/fs/utils.py Normal file
View file

@ -0,0 +1,15 @@
import os.path
import ntpath
# Check if a file exists
def file_exists(file: str) -> bool:
return os.path.isfile(file)
# Get parent directory of file
def get_parent(path: str) -> str:
return os.path.dirname(path)
# Get filename from path string
def get_file(path: str) -> str:
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)