From 64d2c653dca566b469393c5275a1f01c64b7fdda Mon Sep 17 00:00:00 2001 From: cjnaz Date: Sun, 2 Dec 2018 16:46:09 -0700 Subject: [PATCH] new --- README.md | 82 +++++++++++ xbsjsonedit | 387 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100755 README.md create mode 100755 xbsjsonedit diff --git a/README.md b/README.md new file mode 100755 index 0000000..5459e0e --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# xbsjsonedit + +xbsjsonedit is a limited tool for modifying backup data dumped from the +[xBrowserSync App](https://www.xbrowsersync.org/) ([GitHub](https://github.com/xBrowserSync)). +It was originally developed for cleanup up bookmark replication problems in early releases +of the App. This tool may be useful for pruning duplicate bookmarks and general maintenance. +If you want it to do other functions, grab the code and go for it. + +## Usage +``` +$ ./xbsjsonedit -h +usage: xbsjsonedit [-h] Infile + +xBrowserSync json backup editor + +positional arguments: + Infile json backup file + +optional arguments: + -h, --help show this help message and exit +``` + +## Internal command menu +``` +$ ./xbsjsonedit xBrowserSyncBackup_201811232150.json + +--------------------------------------------------------------------- +Options: + s: Search term entry (currently <__NONE__>) + t: List search matches + T: Delete search matches + + d: List duplicate URLs + D: Delete duplicate URLs + + x: List tags on folders (only leaf nodes should have tags) + X: Delete tags on folders + + y: List empty folders (may arise if all children were deleted) + Y: Delete empty folders + + w: Write out the bookmarks data to a file + q: Quit/exit (Do a Write first!) + +Enter option: +``` +## Usage notes +Four modes are supported Listing/Deleting based on +- A search string +- Duplicate URLs in bookmarks - such as the same URL existing in two or more folders +- Folders that have tags - there is no value for folders having tags +- Empty folders - which may arise if all bookmarks were deleted from a folder + +A lower case letter `t/d/x/y` will list the offenders, while an upper case letter +`T/D/X/Y` will allow for selective deletes of the offenders. The lower case List operation +need not be run before running the upper case Delete operation. +Delete functions support individual bookmark selection, or all that match the selection mode. +You cannot do any damage to the original bookmark file during a session, so experiment and poke +around. + +When deleting empty folders you may find that you need to repeat the operation a few times. +This happens when there are folders within folders that contain no bookmarks. The lower folder +must be deleted before the next higher folder is seen as empty. Just repeat the folder deletes +until the Matches count is 0. + +The output file defaults to the Infile + "_OUT", and can be specified during the write operation. +You may want to write out intermediate editing results to save work done up to that point. +The output format is "pretty printed" json for readability, and is accepted by the +xBrowserSync app Restore function via copy/paste. Note: xBrowserSync will allow you to restore +to (blast) your current +syncID, so you may want to Disable Sync, then create a new sync before restoring the modified bookmarks. + + + +## Known issues: +- This code was developed on Python 2.7.10 on Windows, and exercised on Linux. It is known broken +on Python 3. +- The xBrowserSync Backup function appears to dump out Latin-1 encoding, not UTF-8. The +Write command from this tool outputs UTF-8, which seems to Restore without error in xBrowserSync, +but effectively garbages up the bookmark title text for non-ASCII bookmarks. + + diff --git a/xbsjsonedit b/xbsjsonedit new file mode 100755 index 0000000..d92c0a6 --- /dev/null +++ b/xbsjsonedit @@ -0,0 +1,387 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +#========================================================== +# +desc = """xBrowserSync json backup editor""" +# Chris Nelson, November 2018 +# +# 181128 New +# +# Issues, and changes pending +# This code was developed on Python 2.7.10 on Windows, and exercised on Linux. +# It is known broken on Python 3. +# The xBrowserSync Backup function appears to dump out Latin-1 encoding, not UTF-8. The Write command +# from this tool outputs UTF-8, which seems to Restore without error in xBrowserSync, but effectively +# garbages up the bookmark text for non-ASCII bookmarks. +# +#========================================================== + +import argparse +import os.path +import json +import codecs +import sys + + +# Configs / Constants +INDENT = 2 +OFILESUFFIX = "_OUT" +NONE_SEARCH = "__NONE__" + + +# Global items +url_dict = {} + + +def main(): + global match_cnt + global change_cnt + global do_all + global json_dict + global url_dict + + # Using latin-1 encoding so that the raw bytes are accepted as they are. +## with codecs.open(args.Infile, encoding="latin-1") as json_data: +## json_dict = json.load(json_data) + with open(args.Infile) as json_data: + json_dict = json.load(json_data, encoding="latin-1") + + term = NONE_SEARCH + + + while (1): + match_cnt = 0 + change_cnt = 0 + do_all = False + + print (""" +--------------------------------------------------------------------- +Options: + s: Search term entry (currently <{}>) + t: List search matches + T: Delete search matches + + d: List duplicate URLs + D: Delete duplicate URLs + + x: List tags on folders (only leaf nodes should have tags) + X: Delete tags on folders + + y: List empty folders (may arise if all children were deleted) + Y: Delete empty folders + + w: Write out the bookmarks data to a file + q: Quit/exit (Do a Write first!) +""".format(term)) + + select = prompt("Enter option: ") + + if select == 's': + term = prompt("Search for (empty to clear search term): ").lower() + if term == "": + term = NONE_SEARCH + elif select == 't': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term=term, operation="search") + print ("\nMatches: {}\n".format(match_cnt)) + elif select == 'T': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term=term, operation="search", commit=True) + delete_items () # Do the queued deletes + print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt)) + + + elif select == 'd': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupURLs") + dup_urls() + url_dict = {} + print ("\nMatches: {}\n".format(match_cnt)) + elif select == 'D': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupURLs") + dup_urls(commit=True) + url_dict = {} + delete_items () + print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt)) + + + elif select == 'x': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="FolderTags") + print ("\nMatches: {}\n".format(match_cnt)) + elif select == 'X': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="FolderTags", commit=True) + print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt)) + + + elif select == 'y': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="EmptyFolders") + print ("\nMatches: {}\n".format(match_cnt)) + elif select == 'Y': + digin(parent=json_dict["xBrowserSync"]["bookmarks"], parent_id="", path="/", search_term="", operation="EmptyFolders", commit=True) + delete_items () + print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt)) + + + elif select == 'w': + ans = prompt("Output file name (default <{}>: ".format(args.Infile + OFILESUFFIX)) + if ans == "": + ans = args.Infile + OFILESUFFIX + with open(ans, 'w') as ofile: + json.dump(json_dict, ofile, indent=2) #, encoding="latin-1") + elif select == 'q': + exit() + + else: + print ("Invalid option {}".format(select)) + + + +def digin(parent, parent_id, path, search_term, operation, commit=False): + """Recurse through the json dictionary bookmark tree, with operation options.""" + global match_cnt + global change_cnt + global do_all + + item_index = -1 + ans = 'n' + item_list = [] + + for item in parent: + item_index += 1 + + if "children" in item: # ***** Its a folder ***** + + if operation == "FolderTags": + if "tags" in item: + print ("\n{:4} - {} >>> {}\n {}".format( + item["id"], + path[3:], + item["title"].encode('latin-1'), + item["tags"])) + if "url" in item: + if item["url"] == None: + print (" url = None !!!") + else: + print (" url: <{}>".format(item["url"])) + match_cnt += 1 + if commit: + if do_all: + ans = 'y' + else: + ans = prompt("Confirm immediate delete of tags on this folder ('y'es, 'a'll, or 'q'uit, default no) ").lower() + if ans == 'a': + do_all = True + ans = 'y' + if ans == 'y': + del item["tags"] + change_cnt += 1 + if ans == 'q': + return -1 + + if operation == "EmptyFolders": + if len(item["children"]) == 0: + print ("\n{:4} - {} >>> {}".format( + item["id"], + path, #[3:], + item["title"].encode('latin-1'))) + match_cnt += 1 + if commit: + if do_all: + ans = 'y' + else: + ans = prompt("Confirm delete of this empty folder ('y'es, 'a'll, or 'q'uit, default no) ").lower() + if ans == 'a': + do_all = True + ans = 'y' + if ans == 'y': + collect_items (path + parent_id, parent, item_index) + change_cnt += 1 + if ans == 'q': + if prompt("Discard pending deletes ('y'es, default no)? ").lower() == 'y': + path_dict = {} + return -1 + + if digin ( # Recurse down to the next level + parent = item["children"], + parent_id = str(item["id"]).encode("utf-8").decode("utf-8"), # WTF? https://stackoverflow.com/questions/17627834/convert-an-int-value-to-unicode + path = path + " > " + item["title"], + search_term = search_term, + operation = operation, + commit = commit) == -1: + return -1 # recursion 'q'uit switch + + + else: # ***** Its a Leaf node ***** + if operation == "DupURLs": + log_urls (item["url"], path, item["id"], parent, parent_id, item_index) + + if operation == "search": + match = False + if search_term in item["url"].lower() or search_term in item["title"].lower(): + match = True + if "tags" in item: + if item["tags"] != None: + for tag in item["tags"]: + if search_term in tag: + match = True + + if match: + match_cnt += 1 + print ("\n{:4} - {} >>> {}\n url: <{}>".format( + item["id"], + path[3:], + item["title"].encode('latin-1'), + item["url"])) + if "tags" in item: + if item["tags"] == None: + print (" tags = None !!!") + else: + print (" tags: <{}>".format(item["tags"])) + + if commit: + if do_all: + ans = 'y' + else: + ans = prompt("Confirm delete for this item ('y'es, 'a'll, or 'q'uit, default no) ").lower() + if ans == 'a': + do_all = True + ans = 'y' + if ans == 'y': + collect_items (path + parent_id, parent, item_index) + change_cnt += 1 + if ans == 'q': + if prompt("Discard pending deletes ('y'es, default no)? ").lower() == 'y': + path_dict = {} + return -1 + + +path_dict = {} +def collect_items (path, parent, index): + """Collect items for later deletion by delete_items.""" + global path_dict +## print path +## print parent +## print index + if path not in path_dict: + path_dict[path] = {"parent":parent, "index":[int(index)]} + else: + path_dict[path]["index"].append(int(index)) + + +def delete_items (): + """Delete queued-up items by collect_items. + Note: Items in lists that are to be deleted must be deleted from the highest + index / item number to the lowest, else the index numbers will be wrong, resulting + in garbage. This is the purpose of the collect_items and delete_items functions. + """ + global path_dict + + for path in path_dict: + xxx = path_dict[path]["index"] +## print path +## print xxx + xxx.sort(reverse=True) + for _index in xxx: +## print path_dict[path]["parent"] + del path_dict[path]["parent"][_index] + path_dict = {} + + +def log_urls (url, path, _id, parent, parent_id, item_index): + """Capture a list of all instances of each URL.""" + path_pid = path + " [" + parent_id + "]" + if url not in url_dict: + url_dict[url] = {"instance": [{"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id}]} + else: + url_dict[url]["instance"].append({"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id}) + + +def dup_urls (commit=False): + """List out, and optionally delete, bookmarks with the same URL.""" + global match_cnt + global change_cnt + global path_dict + for url in url_dict: + if len(url_dict[url]["instance"]) > 1: + print ("-------------------------------------------------\n{}".format(url)) + for instance in url_dict[url]["instance"]: + print (" {:>4} - {}".format(instance["id"], instance["path"])) + match_cnt += 1 + if commit: + print ("") + for instance in url_dict[url]["instance"]: + print (" {:>4} - {}".format(instance["id"], instance["path"])) + ans = prompt ("Confirm delete for this item ('y'es, 'q'uit, 's'kip to next URL - default no) ").lower() + if ans == 'y': + collect_items (instance["path"], instance["parent"], instance["item_index"]) + change_cnt += 1 + if ans == 's': + break + if ans == 'q': + if prompt("Discard pending deletes ('y'es, default no)? ").lower() == 'y': + path_dict = {} + return -1 + + +def prompt (prompt_text): + if sys.version_info[0] < 3: + return (raw_input(prompt_text)) + else: + return (input(prompt_text)) + + + +def get_item (path): + """Lookup leaf node data given hierarchical path info. NOT USED, BUT POTENTIALLY INTERESTING. +Example passed-in path list structure: + [ "xBrowserSync", "bookmarks", 2, "ATV", 7, "Honda parts", 4 ] + A named item translates to "children" in the json_dict hierarchy. +Corresponds to this path in json_dict: + json_dict["xBrowserSync"]["bookmarks"][2]["children"][7]["children"][4] + +Returns dictionary: + {"title" : "", + "url" : "<url text>", + "id" : id (int) + "tags" : [ "<tags>", "<list"> ], + "text_path: "<string concat of titles and list indexes>" + } +""" + global json_dict + + JOINTEXT = " > " + + xxx = json_dict["xBrowserSync"]["bookmarks"] + text_full_path = "" + + for item in path: + print (item) + if type(item) is str: # Its the title of a folder (child) level + xxx = xxx["children"] + else: # Its a list index (integer) + text_full_path += JOINTEXT + xxx[item]["title"] + xxx = xxx[item] + + tags = "" + if "tags" in xxx: + tags = xxx["tags"] + + return { + "title" : xxx["title"], + "url" : xxx["url"], + "id" : xxx["id"], + "tags" : tags, + "text_full_path": text_full_path + } + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=desc, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('Infile', + help="json backup file") + args = parser.parse_args() + + if not os.path.exists(args.Infile): + print ("Can't find the input file <{}>".format(args.Infile)) + exit() + + + main()