Files
xbsjsonedit/xbsjsonedit
T
2020-06-08 13:07:24 -07:00

703 lines
29 KiB
Python
Executable File

#!/usr/bin/env python3
"""xBrowserSync json backup editor
Usages:
Command line modes:
xbsjsonedit news prints bookmarks tagged with 'news'
xbsjsonedit news --bookmarks prints bookmarks tagged with 'news' and any bookmarks or URLs containing 'news'
xbsjsonedit --print prints all bookmarks with their hierarchy
xbsjsonedit --tags 5 prints only tag names that are used at least 5 times
xbsjsonedit --tags 5 --names prints tag names and their bookmarks that are used at least 5 times
xbsjsonedit --tags 0 --names prints tag names and their bookmark that are used exactly 1 time
xbsjsonedit --html <outfile.html> writes tagged bookmarks to the specified browsable file
Interactive mode:
xbsjsonedit Enters interactive mode
The path to the xbs_backup*.json file defaults to the newest in the download directory. --jsonbackup overrides the default.
"""
__version__ = "v1.1 200608"
#==========================================================
#
# Chris Nelson 2018 - 2020
#
# 200608 v1.1 Added cli search and html features, defaulted .json file to be the newest in the download directory.
# 200330 v1.0 Merged pull request #3 from mblais (add unbuffered getch input; add 'all' option), Reworked --tags and --names switches.
# 191206 v0.3 Added tags dump
# 191205 v0.2 Updated to xbrowsersync v1.5 and Python 3.x ONLY
# 181128 v0.1 New
#
# Issues, and changes pending
# None
#
#==========================================================
import argparse
import os
import json
import codecs
import sys
import io
import glob
# Configs / Constants
INDENT = 2
OFILESUFFIX = "_OUT"
NONE_SEARCH = "__NONE__"
if sys.platform == "win32":
JSON_PATH_DEFAULT = "Z:\\Downloads\\"
if "linux" in sys.platform: # <linux2> on Py2, <linux> on Py3
JSON_PATH_DEFAULT = "/mnt/share/Downloads/"
py_version = float(sys.version_info.major) + float(sys.version_info.minor)/10
if py_version < 3.6:
print ("Not supported on Python versions before 3.6.")
sys.exit()
# Global items
url_dict = {}
folder_dict = {}
tags_dict = {}
def main():
global match_cnt
global change_cnt
global do_all
global json_dict
global url_dict
global folder_dict
global tags_dict
with io.open(infile, encoding='utf8', errors="replace") as json_data:
json_dict = json.load(json_data)
term = NONE_SEARCH
# CLI mode
if args.print:
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="printtree")
sys.exit()
if args.html is not None:
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="GatherTags")
tags_count = 2
if args.tags > 0:
tags_count = args.tags
html_fout = io.open (args.html, 'wt')
html_fout.write("""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>xbs Tags</title>
</head>
<body>
<a name="top"></a>
<h1>Tagged bookmarks list</h1>
""")
html_fout.write(f'(Tags used {tags_count} or more times)<br>\n')
for tag in sorted(tags_dict.keys()):
if len(tags_dict[tag]) >= tags_count:
html_fout.write(f' <b><a href="#{tag}">{tag}</a></b><br>\n')
html_fout.write(" <br>\n")
for tag in sorted(tags_dict.keys()):
if len(tags_dict[tag]) >= tags_count:
html_fout.write(f' <a name="{tag}"></a><H3>{tag} ({len(tags_dict[tag])}):</H3>\n')
html_fout.write(' <BLOCKQUOTE>\n')
for bookmark in tags_dict[tag]:
html_fout.write (' <A HREF="{}" target="_blank">{}</A><BR>\n'.format(bookmark['url'], bookmark['title']))
html_fout.write(' <a href="#top">(top)</a>\n')
html_fout.write(' </BLOCKQUOTE>\n')
html_fout.write(" </body> </html>")
html_fout.close()
sys.exit()
if args.tags > -1:
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="GatherTags")
for tag in sorted(tags_dict.keys()):
if args.tags == 0:
if len(tags_dict[tag]) == 1:
print ("[{}] (1):".format(tag))
if args.names:
print (" {:<4} - {}".format(tags_dict[tag][0]["id"], tags_dict[tag][0]["title"]))
print()
else:
if len(tags_dict[tag]) >= args.tags:
print ("[{}] ({}):".format(tag, len(tags_dict[tag])))
if args.names:
for bookmark in tags_dict[tag]:
print (" {:<4} - {}".format(bookmark["id"], bookmark["title"]))
print()
sys.exit()
if args.SearchTerm is not None:
_search_term = args.SearchTerm.lower()
print (f"***** Bookmarks tagged with <{_search_term}>:")
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="GatherTags")
for tag in sorted(tags_dict.keys()):
if _search_term in tag.lower():
print (f" [{tag}] ({len(tags_dict[tag])}):")
for bookmark in tags_dict[tag]:
print (f" {bookmark['title']}")
print()
if args.bookmarks:
match_cnt = 0
print (f"\n***** Bookmarks containing or tagged with <{_search_term}>:")
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term=_search_term, operation="search2")
sys.exit()
if args.bookmarks: # and not args.SearchTerm
print ("A SearchTerm is required with use of the --bookmark switch.")
sys.exit()
# Interactive mode
while (1):
match_cnt = 0
change_cnt = 0
do_all = False
print ("""
---------------------------------------------------------------------
Options:
p: Print the bookmarks tree hierarchy
s: Search term entry (currently <{}>)
t: List bookmark search matches
T: Delete bookmark search matches
g: List folder names search matches
G: Delete folder names search matches
d: List duplicate URLs
D: Delete duplicate URLs
f: List duplicate folders
F: Delete duplicate folders
x: List tags on folders (only leaf nodes should have tags)
X: Delete tags on folders
y: List empty folders (may arise if all children were deleted)
Y: Delete empty folders
w: Write out the bookmarks data to a file
q: Quit/exit (Do a Write first!)
""".format(term))
select = prompt("Enter option: ", valid="pstTgGdDfFxXyYwq")
if select == 'p':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="printtree")
elif select == 's':
term = prompt_long("Search for (empty to clear search term): ").lower()
if term == "":
term = NONE_SEARCH
elif select == 't':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term=term, operation="search")
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'T':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term=term, operation="search", commit=True)
delete_items () # Do the queued deletes
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'g':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term=term, operation="SearchFolders")
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'G':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term=term, operation="SearchFolders", commit=True)
delete_items () # Do the queued deletes
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'd':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupURLs")
dup_urls()
url_dict = {}
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'D':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupURLs")
dup_urls(commit=True)
url_dict = {}
delete_items ()
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'f':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupFolders")
dup_folders()
folder_dict = {}
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'F':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="DupFolders")
dup_folders(commit=True)
folder_dict = {}
delete_items ()
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'x':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="FolderTags")
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'X':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="FolderTags", commit=True)
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'y':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="EmptyFolders")
print ("\nMatches: {}\n".format(match_cnt))
elif select == 'Y':
digin(parent=json_dict["xbrowsersync"]["data"]["bookmarks"], parent_id="", path="/", search_term="", operation="EmptyFolders", commit=True)
delete_items ()
print ("\nMatches: {} Deletes: {}\n".format(match_cnt, change_cnt))
elif select == 'w':
ans = prompt_long("Output file name (default <{}>: ".format(infile + OFILESUFFIX))
if ans == "":
ans = infile + OFILESUFFIX
with io.open(ans, "w", encoding='utf8') as ofile:
json.dump(json_dict, ofile, ensure_ascii=False, indent=2)
elif select == 'q':
if prompt("Quit now? (Remember to write any changes first!) ", valid="yn") == 'y':
sys.exit()
else:
print ("Shouldn't have gotten here!")
sys.exit()
def digin(parent, parent_id, path, search_term, operation, commit=False, indent=""):
"""Recurse through the json dictionary bookmark tree, with operation options."""
global match_cnt
global change_cnt
global do_all
global path_dict
item_index = -1
ans = 'n'
for item in parent:
item_index += 1
if "children" in item: # ***** Its a folder *****
if operation == "printtree":
print ("{:<4} > {}{}".format(item["id"], indent, item["title"]))
if operation == "SearchFolders":
if search_term in item["title"].lower():
print ("{:<4} > {}{}".format(item["id"], indent, item["title"]))
match_cnt += 1
if commit:
if do_all:
ans = 'y'
else:
ans = prompt("Confirm delete for this item ('y'es, 'a'll, or 'q'uit, default 'n'o) ", valid="yaqn\r")
if ans == 'a':
do_all = True
ans = 'y'
if ans == 'y':
collect_items (path + parent_id, parent, item_index)
change_cnt += 1
if ans == 'q':
if prompt("Discard pending deletes ('y'es, default 'n'o)? ", valid="yn\n") == 'y':
path_dict = {}
return -1
if operation == "FolderTags":
if "tags" in item:
print ("\n{:4} - {} >>> {}\n {}".format(
item["id"],
path[3:],
item["title"],
item["tags"]))
if "url" in item:
if item["url"] == None:
print (" url = None !!!")
else:
print (" url: <{}>".format(item["url"]))
match_cnt += 1
if commit:
if do_all:
ans = 'y'
else:
ans = prompt("Confirm immediate delete of tags on this folder ('y'es, 'a'll, or 'q'uit, default 'n'o) ", valid="yaqn\r")
if ans == 'a':
do_all = True
ans = 'y'
if ans == 'y':
del item["tags"]
change_cnt += 1
if ans == 'q':
return -1
if operation == "EmptyFolders":
if len(item["children"]) == 0:
print ("\n{:4} - {} >>> {}".format(
item["id"],
path,
item["title"]))
match_cnt += 1
if commit:
if do_all:
ans = 'y'
else:
ans = prompt("Confirm delete of this empty folder ('y'es, 'a'll, or 'q'uit, default 'n'o) ", valid="yaqn\r")
if ans == 'a':
do_all = True
ans = 'y'
if ans == 'y':
collect_items (path + parent_id, parent, item_index)
change_cnt += 1
if ans == 'q':
if prompt("Discard pending deletes ('y'es, default 'n'o) ", valid="yn\r") == 'y':
path_dict = {} # TODO
return -1
if operation == "DupFolders":
log_folder (item["title"], path, item["id"], parent, parent_id, item_index)
if digin ( # Recurse down to the next level
parent = item["children"],
parent_id = str(item["id"]),
path = path + " > " + item["title"],
search_term = search_term,
operation = operation,
commit = commit,
indent = indent + " ") == -1:
return -1 # recursion 'q'uit switch
else: # ***** Its a Leaf node *****
if operation == "printtree":
tags = ""
if "tags" in item:
for tag in item["tags"]:
tags += tag + ", "
print ("{:<4} - {}{} [{}]".format(item["id"], indent, item["title"], tags[:-2]))
if operation == "DupURLs":
log_urls (item["url"], path, item["id"], parent, parent_id, item_index)
if operation == "search" or operation == "search2":
match = False
if search_term in item["url"].lower() or search_term in item["title"].lower():
match = True
if "tags" in item:
if item["tags"] != None:
for tag in item["tags"]:
if search_term in tag:
match = True
if match:
match_cnt += 1
if operation == "search":
print ("\n{:4} - {} >>> {}\n url: <{}>".format(
item["id"],
path[3:],
item["title"],
item["url"]))
if "tags" in item:
if item["tags"] == None:
print (" tags = None !!!")
else:
print (" tags: <{}>".format(item["tags"]))
if operation == "search2":
print (f" {item['title']}")
if commit:
if do_all:
ans = 'y'
else:
ans = prompt("Confirm delete for this item ('y'es, 'a'll, or 'q'uit, default 'n'o) ", valid="yaqn\r")
if ans == 'a':
do_all = True
ans = 'y'
if ans == 'y':
collect_items (path + parent_id, parent, item_index)
change_cnt += 1
if ans == 'q':
if prompt("Discard pending deletes ('y'es, default 'n'o) ", valid="yn\r") == 'y':
path_dict = {}
return -1
if operation == "GatherTags":
if "tags" in item:
if item["tags"] != None:
for tag in item["tags"]:
if tag not in tags_dict:
tags_dict[tag] = [{"title":item["title"], "id":item["id"], "url":item["url"]}]
else:
tags_dict[tag].append({"title":item["title"], "id":item["id"], "url":item["url"]})
path_dict = {}
def collect_items (path, parent, index):
"""Collect items for later deletion by delete_items."""
global path_dict
# print (path) # "/ > [xbs] Toolbar > Chris' > Blogs166"
# print (parent) # List of dictionaries of bookmarks [{}, {}, {}]
# print (index) # Index within the list - 2
if path not in path_dict:
path_dict[path] = {"parent":parent, "index":[int(index)]}
else:
# else clause could happen for two identical bookmarks in the same folder, but normally they would have different ID#s
path_dict[path]["index"].append(int(index))
def delete_items ():
"""Delete queued-up items by collect_items.
Note: Items in lists that are to be deleted must be deleted from the highest
index / item number to the lowest, else the index numbers will be wrong, resulting
in garbage. This is the purpose of the collect_items and delete_items functions.
"""
global path_dict
for path in path_dict:
xxx = path_dict[path]["index"]
xxx.sort(reverse=True)
for _index in xxx:
del path_dict[path]["parent"][_index]
path_dict = {}
def log_folder (foldername, path, _id, parent, parent_id, item_index):
"""Capture list of all instances of a folder name."""
path_pid = path + " [" + parent_id + "]"
if foldername not in folder_dict:
folder_dict[foldername] = {"instance": [{"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id}]}
else:
folder_dict[foldername]["instance"].append({"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id})
def dup_folders (commit=False):
"""List out, and optionally delete, duplicate folders."""
global match_cnt
global change_cnt
global folder_dict
global path_dict
yes_all = False
for folder in folder_dict:
if len(folder_dict[folder]["instance"]) > 1:
print ("-------------------------------------------------\n{}".format(folder))
for instance in sorted(folder_dict[folder]["instance"], key=lambda k: k['id']): # Sort id numbers so that lowest is kept for 'a'll mode
print (" {:>4} - {}".format(instance["id"], instance["path"]))
match_cnt += 1
if commit:
print ("")
first_instance = True
for instance in sorted(folder_dict[folder]["instance"], key=lambda k: k['id']):
if first_instance and yes_all:
first_instance = False
continue # When yes_all is active, always keep first instance of duplicate folders
first_instance = False
if not yes_all:
print (" {:>4} - {}".format(instance["id"], instance["path"]))
ans = prompt ("Confirm delete for this item ('y'es, 'q'uit, 's'kip to next URL, 'a'll - default 'n'o) ", valid="yqsan\r")
if ans == 'a':
yes_all = True
if ans == 'y' or yes_all:
collect_items (instance["path"], instance["parent"], instance["item_index"])
change_cnt += 1
if ans == 's':
break
if ans == 'q':
if prompt("Discard pending deletes ('y'es, default 'n'o) ", valid="yn\r") == 'y':
path_dict = {}
return -1
def log_urls (url, path, _id, parent, parent_id, item_index):
"""Capture a list of all instances of each URL."""
path_pid = path + " [" + parent_id + "]"
if url not in url_dict:
url_dict[url] = {"instance": [{"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id}]}
else:
url_dict[url]["instance"].append({"id":_id, "path":path_pid, "parent":parent, "item_index":item_index, "parent_id":parent_id})
def dup_urls (commit=False):
"""List out, and optionally delete, bookmarks with the same URL."""
global match_cnt
global change_cnt
global path_dict
yes_all = False
for url in url_dict:
if len(url_dict[url]["instance"]) > 1:
print ("-------------------------------------------------\n{}".format(url))
for instance in sorted(url_dict[url]["instance"], key=lambda k: k['id']): # Sort id numbers so that lowest is kept for 'a'll mode
print (" {:>4} - {}".format(instance["id"], instance["path"]))
match_cnt += 1
if commit:
print ("")
first_instance = True
for instance in sorted(url_dict[url]["instance"], key=lambda k: k['id']):
if first_instance and yes_all:
first_instance = False
continue # When yes_all is active, always keep first instance of duplicate bookmarks
first_instance = False
if not yes_all:
print (" {:>4} - {}".format(instance["id"], instance["path"]))
ans = prompt ("Confirm delete for this item ('y'es, 'q'uit, 's'kip to next URL, 'a'll - default 'n'o) ", valid="yqsan\r")
if ans == 'a':
yes_all = True
if ans == 'y' or yes_all:
collect_items (instance["path"], instance["parent"], instance["item_index"])
change_cnt += 1
if ans == 's':
break
if ans == 'q':
if prompt("Discard pending deletes ('y'es, default 'n'o) ", valid="yn\r") == 'y':
path_dict = {}
return -1
def prompt_long (prompt_text):
return (input(prompt_text))
def prompt (prompt_text, valid=None):
print(prompt_text, end='', flush=True)
select = getch()
if valid is not None:
while select not in valid:
xx = list(valid)
yy = ", ".join(xx).replace("\r", "enter")
print("\nInvalid input - expecting one of <{}>: ".format(yy), end='', flush=True)
select = getch()
print(select)
return select
class _Getch:
"""Gets a single character from standard input. Does not echo to the screen.
"""
def __init__(self):
try:
self.impl = _GetchWindows()
except ImportError:
self.impl = _GetchUnix()
def __call__(self): return self.impl()
class _GetchUnix:
def __init__(self):
import tty, sys
def __call__(self):
import sys, tty, termios
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(sys.stdin.fileno())
ch = sys.stdin.read(1)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
return ch
class _GetchWindows:
def __init__(self):
import msvcrt
def __call__(self):
import msvcrt
# return msvcrt.getch()
return msvcrt.getch().decode("utf-8")
getch = _Getch()
# def get_item (path):
# """Lookup leaf node data given hierarchical path info. NOT USED, BUT POTENTIALLY INTERESTING.
# Example passed-in path list structure:
# [ "xbrowsersync", "data", "bookmarks", 2, "ATV", 7, "Honda parts", 4 ]
# A named item translates to "children" in the json_dict hierarchy.
# Corresponds to this path in json_dict:
# json_dict["xbrowsersync"]["data"]["bookmarks"][2]["children"][7]["children"][4]
# Returns dictionary:
# {"title" : "<title text>",
# "url" : "<url text>",
# "id" : id (int)
# "tags" : [ "<tags>", "<list"> ],
# "text_path: "<string concat of titles and list indexes>"
# }
# """
# global json_dict
# JOINTEXT = " > "
# xxx = json_dict["xbrowsersync"]["data"]["bookmarks"]
# text_full_path = ""
# for item in path:
# print (item)
# if type(item) is str: # Its the title of a folder (child) level
# xxx = xxx["children"]
# else: # Its a list index (integer)
# text_full_path += JOINTEXT + xxx[item]["title"]
# xxx = xxx[item]
# tags = ""
# if "tags" in xxx:
# tags = xxx["tags"]
# return {
# "title" : xxx["title"],
# "url" : xxx["url"],
# "id" : xxx["id"],
# "tags" : tags,
# "text_full_path": text_full_path
# }
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('SearchTerm', nargs='?', # optional positional argument
help="Print bookmarks tagged with this text (CLI mode).")
parser.add_argument('--bookmarks', '-b', action='store_true',
help="Print bookmarks containing or tagged with the SearchTerm (CLI mode).")
parser.add_argument('--print', '-p', action='store_true',
help="Print complete bookmark hierarchy (redirect to less or a file) (CLI mode).")
parser.add_argument('--tags', '-t', type=int, default=-1,
help="Print tags list filtered by tags with a minimum <n> number of uses. n=0 prints only single use tags. (CLI mode)")
parser.add_argument('--names', '-n', action='store_true',
help="Used with --tags to enable printing the titles/names for each tag. (CLI mode)")
parser.add_argument('--html', type=str,
help=f"Path of html output file.")
parser.add_argument('--jsonbackup', '-j', type=str,
help=f"Path to json backup file. Default is newest version at <{JSON_PATH_DEFAULT}xbs_backup*.json>.")
parser.add_argument('-V', '--version',
help="Return version number and exit.",
action='version',
version='%(prog)s ' + __version__)
args = parser.parse_args()
infile = args.jsonbackup
if infile is None:
infile = "__none__"
list_of_files = glob.glob(f'{JSON_PATH_DEFAULT}xbs_backup*.json')
# if is_Linux:
# list_of_files = glob.glob(f'{JSON_PATH_DEFAULT_LINUX}xbs_backup*.json')
# if is_Windows:
# list_of_files = glob.glob(f'{JSON_PATH_DEFAULT_WINDOWS}xbs_backup*.json')
# for file in list_of_files:
# print (file)
infile = max(list_of_files, key=os.path.getctime)
# print ("latest: ", infile)
# exit()
if not os.path.exists(infile):
print ("Can't find the input file <{}>".format(infile))
sys.exit()
main()