Import XLSX support: add collection fields and migration script
This commit is contained in:
189
scripts/import_collections_xlsx.py
Executable file
189
scripts/import_collections_xlsx.py
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
|
||||
NS_MAIN = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
||||
NS_REL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
NS_PKG_REL = "http://schemas.openxmlformats.org/package/2006/relationships"
|
||||
|
||||
NS = {
|
||||
"m": NS_MAIN,
|
||||
"r": NS_REL,
|
||||
"pr": NS_PKG_REL,
|
||||
}
|
||||
|
||||
CONSOLE_TO_BRAND = {
|
||||
"NES": "NINTENDO",
|
||||
"SNES": "NINTENDO",
|
||||
"WII": "NINTENDO",
|
||||
"PS1": "SONY",
|
||||
"PS2": "SONY",
|
||||
"PS3": "SONY",
|
||||
"PS4": "SONY",
|
||||
"PS5": "SONY",
|
||||
"XBOX 360": "MICROSOFT",
|
||||
}
|
||||
|
||||
|
||||
def col_to_index(col: str) -> int:
|
||||
idx = 0
|
||||
for ch in col:
|
||||
idx = idx * 26 + (ord(ch) - 64)
|
||||
return idx
|
||||
|
||||
|
||||
def normalize_console(sheet_name: str) -> str:
|
||||
normalized = sheet_name.strip().upper()
|
||||
if normalized == "WII":
|
||||
return "Wii"
|
||||
return sheet_name.strip()
|
||||
|
||||
|
||||
def to_number(value):
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
text = str(value).strip().replace(",", ".")
|
||||
if text == "":
|
||||
return None
|
||||
try:
|
||||
return float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_xlsx(path: str):
|
||||
brands = {}
|
||||
games_by_console = {}
|
||||
|
||||
with zipfile.ZipFile(path) as zf:
|
||||
shared = []
|
||||
if "xl/sharedStrings.xml" in zf.namelist():
|
||||
shared_root = ET.fromstring(zf.read("xl/sharedStrings.xml"))
|
||||
for si in shared_root.findall("m:si", NS):
|
||||
text = "".join(t.text or "" for t in si.findall(".//m:t", NS))
|
||||
shared.append(text)
|
||||
|
||||
wb = ET.fromstring(zf.read("xl/workbook.xml"))
|
||||
rels = ET.fromstring(zf.read("xl/_rels/workbook.xml.rels"))
|
||||
rel_map = {rel.attrib["Id"]: rel.attrib["Target"] for rel in rels.findall("pr:Relationship", NS)}
|
||||
|
||||
sheets = []
|
||||
for sheet in wb.findall(".//m:sheets/m:sheet", NS):
|
||||
name = sheet.attrib["name"]
|
||||
rid = sheet.attrib[f"{{{NS_REL}}}id"]
|
||||
target = rel_map[rid]
|
||||
if not target.startswith("xl/"):
|
||||
target = "xl/" + target
|
||||
sheets.append((name, target))
|
||||
|
||||
for sheet_name, target in sheets:
|
||||
console_name = normalize_console(sheet_name)
|
||||
brand = CONSOLE_TO_BRAND.get(sheet_name.strip().upper())
|
||||
if not brand:
|
||||
continue
|
||||
|
||||
brands.setdefault(brand, [])
|
||||
if console_name not in brands[brand]:
|
||||
brands[brand].append(console_name)
|
||||
games_by_console.setdefault(console_name, [])
|
||||
|
||||
root = ET.fromstring(zf.read(target))
|
||||
rows = root.findall(".//m:sheetData/m:row", NS)
|
||||
|
||||
for row in rows:
|
||||
cells = row.findall("m:c", NS)
|
||||
values = {i: "" for i in range(1, 7)}
|
||||
|
||||
for cell in cells:
|
||||
ref = cell.attrib.get("r", "A1")
|
||||
match = re.match(r"[A-Z]+", ref)
|
||||
if not match:
|
||||
continue
|
||||
idx = col_to_index(match.group(0))
|
||||
if idx > 6:
|
||||
continue
|
||||
|
||||
cell_type = cell.attrib.get("t")
|
||||
value_elem = cell.find("m:v", NS)
|
||||
value = ""
|
||||
if value_elem is not None and value_elem.text is not None:
|
||||
if cell_type == "s":
|
||||
try:
|
||||
value = shared[int(value_elem.text)]
|
||||
except (ValueError, IndexError):
|
||||
value = value_elem.text
|
||||
else:
|
||||
value = value_elem.text
|
||||
values[idx] = value
|
||||
|
||||
title = str(values[1]).strip()
|
||||
version = str(values[2]).strip()
|
||||
duplicate_raw = str(values[3]).strip()
|
||||
purchase_price = to_number(values[4])
|
||||
cote = to_number(values[5])
|
||||
condition = to_number(values[6])
|
||||
|
||||
if version.upper() == "VERSION":
|
||||
continue
|
||||
|
||||
if not title:
|
||||
continue
|
||||
|
||||
if re.fullmatch(r"\d+(\.\d+)?", title) and not any([version, duplicate_raw, purchase_price, cote, condition]):
|
||||
continue
|
||||
|
||||
games_by_console[console_name].append(
|
||||
{
|
||||
"title": title,
|
||||
"version": version,
|
||||
"isDuplicate": duplicate_raw.upper() == "OUI",
|
||||
"purchasePrice": purchase_price,
|
||||
"value": cote,
|
||||
"condition": condition,
|
||||
"genre": "",
|
||||
"publisher": "",
|
||||
"year": None,
|
||||
"loanedTo": "",
|
||||
}
|
||||
)
|
||||
|
||||
return {"brands": brands, "gamesByConsole": games_by_console}
|
||||
|
||||
|
||||
def post_import(api_base: str, payload: dict):
|
||||
req = urllib.request.Request(
|
||||
f"{api_base.rstrip('/')}/api/catalog/import",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
body = resp.read().decode("utf-8")
|
||||
return json.loads(body)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Import COLLECTIONS.xlsx to video game DB API")
|
||||
parser.add_argument("xlsx_path", help="Path to COLLECTIONS.xlsx")
|
||||
parser.add_argument("--api-base", default="http://127.0.0.1:7001", help="API base URL")
|
||||
args = parser.parse_args()
|
||||
|
||||
payload = parse_xlsx(args.xlsx_path)
|
||||
result = post_import(args.api_base, payload)
|
||||
|
||||
total_games = sum(len(v) for v in payload["gamesByConsole"].values())
|
||||
print(json.dumps({
|
||||
"sheetsImported": list(payload["gamesByConsole"].keys()),
|
||||
"parsedGames": total_games,
|
||||
"apiResult": result,
|
||||
}, ensure_ascii=True, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user