Trying to make the data handling more robust. Adding uuid identifiers and stuff..
This commit is contained in:
parent
3d35990b3e
commit
4dfc81bd44
@ -43,29 +43,33 @@ class DatabaseManager:
|
|||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS headings (
|
CREATE TABLE IF NOT EXISTS headings (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
uuid TEXT NOT NULL UNIQUE,
|
||||||
level INTEGER NOT NULL,
|
level INTEGER NOT NULL,
|
||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
parent_id INTEGER,
|
parent_uuid TEXT,
|
||||||
document_id INTEGER NOT NULL,
|
document_id INTEGER NOT NULL,
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
headings_order INTEGER,
|
||||||
added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
updated_timestamp DATETIME,
|
updated_timestamp DATETIME,
|
||||||
deleted_timestamp DATETIME,
|
deleted_timestamp DATETIME,
|
||||||
isDeleted BOOLEAN DEFAULT 0,
|
isDeleted BOOLEAN DEFAULT 0,
|
||||||
FOREIGN KEY (parent_id) REFERENCES headings(id),
|
FOREIGN KEY (parent_uuid) REFERENCES headings(uuid),
|
||||||
FOREIGN KEY (document_id) REFERENCES documents(id)
|
FOREIGN KEY (document_id) REFERENCES documents(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS body (
|
CREATE TABLE IF NOT EXISTS body (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
uuid TEXT NOT NULL UNIQUE,
|
||||||
content TEXT,
|
content TEXT,
|
||||||
heading_id INTEGER NOT NULL,
|
heading_uuid TEXT NOT NULL,
|
||||||
document_id INTEGER NOT NULL,
|
document_id INTEGER NOT NULL,
|
||||||
md5sum TEXT,
|
md5sum TEXT,
|
||||||
added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
updated_timestamp DATETIME,
|
updated_timestamp DATETIME,
|
||||||
deleted_timestamp DATETIME,
|
deleted_timestamp DATETIME,
|
||||||
isDeleted BOOLEAN DEFAULT 0,
|
isDeleted BOOLEAN DEFAULT 0,
|
||||||
FOREIGN KEY (heading_id) REFERENCES headings(id),
|
FOREIGN KEY (heading_uuid) REFERENCES headings(uuid),
|
||||||
FOREIGN KEY (document_id) REFERENCES documents(id)
|
FOREIGN KEY (document_id) REFERENCES documents(id)
|
||||||
);
|
);
|
||||||
''')
|
''')
|
||||||
@ -83,12 +87,6 @@ class DocumentManager:
|
|||||||
def document_exists(self, document_name: str) -> Optional[Tuple[int]]:
|
def document_exists(self, document_name: str) -> Optional[Tuple[int]]:
|
||||||
"""
|
"""
|
||||||
Check if a document exists in the database.
|
Check if a document exists in the database.
|
||||||
|
|
||||||
Args:
|
|
||||||
document_name: Name of the document to check.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Document ID if it exists, None otherwise.
|
|
||||||
"""
|
"""
|
||||||
self.db_manager.cursor.execute('SELECT id FROM documents WHERE name = ?', (document_name,))
|
self.db_manager.cursor.execute('SELECT id FROM documents WHERE name = ?', (document_name,))
|
||||||
return self.db_manager.cursor.fetchone()
|
return self.db_manager.cursor.fetchone()
|
||||||
@ -119,7 +117,7 @@ class DocumentManager:
|
|||||||
|
|
||||||
def soft_delete_document(self, document_id: int) -> None:
|
def soft_delete_document(self, document_id: int) -> None:
|
||||||
"""Soft delete a document by marking it as deleted in the database."""
|
"""Soft delete a document by marking it as deleted in the database."""
|
||||||
logging.debug(f"** This now soft deleted, document_id: {document_id}")
|
logging.debug(f"** This document is now soft deleted, document_id: {document_id}")
|
||||||
now: str = datetime.now().isoformat()
|
now: str = datetime.now().isoformat()
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
UPDATE documents SET isDeleted = 1, deleted_timestamp = ? WHERE id = ?
|
UPDATE documents SET isDeleted = 1, deleted_timestamp = ? WHERE id = ?
|
||||||
@ -146,6 +144,7 @@ class MarkdownProcessor:
|
|||||||
md = MarkdownIt()
|
md = MarkdownIt()
|
||||||
tokens = md.parse(markdown_text)
|
tokens = md.parse(markdown_text)
|
||||||
|
|
||||||
|
print('### Calling update_document_content')
|
||||||
self.update_document_content(tokens, document_id)
|
self.update_document_content(tokens, document_id)
|
||||||
|
|
||||||
def read_markdown_file(self, file_path: str) -> str:
|
def read_markdown_file(self, file_path: str) -> str:
|
||||||
@ -154,39 +153,42 @@ class MarkdownProcessor:
|
|||||||
|
|
||||||
def update_document_content(self, tokens: List, document_id: int) -> None:
|
def update_document_content(self, tokens: List, document_id: int) -> None:
|
||||||
existing_structure = self.get_existing_document_structure(document_id)
|
existing_structure = self.get_existing_document_structure(document_id)
|
||||||
new_structure = self.parse_new_structure(tokens)
|
new_structure = self.parse_new_structure(tokens, document_id)
|
||||||
|
|
||||||
|
print('### Calling merg_structures...')
|
||||||
self.merge_structures(existing_structure, new_structure, document_id)
|
self.merge_structures(existing_structure, new_structure, document_id)
|
||||||
|
|
||||||
def get_existing_document_structure(self, document_id: int) -> Dict:
|
def get_existing_document_structure(self, document_id: int) -> Dict:
|
||||||
structure = {}
|
structure = {}
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
SELECT h.id, h.level, h.title, h.parent_id, b.content
|
SELECT h.uuid, h.level, h.title, h.parent_uuid, h.path, b.content, b.uuid
|
||||||
FROM headings h
|
FROM headings h
|
||||||
LEFT JOIN body b ON h.id = b.heading_id
|
LEFT JOIN body b ON h.uuid = b.heading_uuid
|
||||||
WHERE h.document_id = ? AND h.isDeleted = 0
|
WHERE h.document_id = ? AND h.isDeleted = 0
|
||||||
ORDER BY h.level, h.id
|
ORDER BY h.level, h.id
|
||||||
''', (document_id,))
|
''', (document_id,))
|
||||||
for heading_id, level, title, parent_id, content in self.db_manager.cursor.fetchall():
|
for heading_uuid, level, title, parent_uuid, path, content, body_uuid in self.db_manager.cursor.fetchall():
|
||||||
structure[heading_id] = {
|
structure[heading_uuid] = {
|
||||||
'id': heading_id, # Add this line to include the id in each node
|
'uuid': heading_uuid,
|
||||||
'level': level,
|
'level': level,
|
||||||
'title': title,
|
'title': title,
|
||||||
'parent_id': parent_id,
|
'parent_uuid': parent_uuid,
|
||||||
|
'path': path,
|
||||||
'content': content,
|
'content': content,
|
||||||
|
'body_uuid': body_uuid,
|
||||||
'children': []
|
'children': []
|
||||||
}
|
}
|
||||||
# Build the tree structure
|
# Build the tree structure
|
||||||
for id, node in structure.items():
|
for uuid, node in structure.items():
|
||||||
if node['parent_id'] in structure:
|
if node['parent_uuid'] in structure:
|
||||||
structure[node['parent_id']]['children'].append(id)
|
structure[node['parent_uuid']]['children'].append(uuid)
|
||||||
return structure
|
return structure
|
||||||
|
|
||||||
def parse_new_structure(self, tokens: List) -> Dict:
|
def parse_new_structure(self, tokens: List, document_id: int) -> Dict:
|
||||||
structure = {}
|
structure = {}
|
||||||
current_heading = None
|
current_heading = None
|
||||||
current_content = []
|
current_content = []
|
||||||
parent_stack = [{"id": None, "level": 0}]
|
parent_stack = [{"uuid": None, "level": 0, "path": ""}]
|
||||||
|
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if token.type == 'heading_open':
|
if token.type == 'heading_open':
|
||||||
@ -195,15 +197,18 @@ class MarkdownProcessor:
|
|||||||
level = int(token.tag.strip('h'))
|
level = int(token.tag.strip('h'))
|
||||||
while parent_stack[-1]['level'] >= level:
|
while parent_stack[-1]['level'] >= level:
|
||||||
parent_stack.pop()
|
parent_stack.pop()
|
||||||
current_heading = str(uuid.uuid4()) # Generate a temporary ID
|
current_heading = str(uuid.uuid4())
|
||||||
|
parent_path = parent_stack[-1]['path']
|
||||||
structure[current_heading] = {
|
structure[current_heading] = {
|
||||||
|
'uuid': current_heading,
|
||||||
'level': level,
|
'level': level,
|
||||||
'title': '',
|
'title': '',
|
||||||
'parent_id': parent_stack[-1]['id'],
|
'parent_uuid': parent_stack[-1]['uuid'],
|
||||||
|
'path': f"{parent_path}/{current_heading}" if parent_path else current_heading,
|
||||||
'content': '',
|
'content': '',
|
||||||
'children': []
|
'children': []
|
||||||
}
|
}
|
||||||
parent_stack.append({"id": current_heading, "level": level})
|
parent_stack.append({"uuid": current_heading, "level": level, "path": structure[current_heading]['path']})
|
||||||
current_content = []
|
current_content = []
|
||||||
elif token.type == 'heading_close':
|
elif token.type == 'heading_close':
|
||||||
structure[current_heading]['content'] = ''.join(current_content).strip()
|
structure[current_heading]['content'] = ''.join(current_content).strip()
|
||||||
@ -221,17 +226,17 @@ class MarkdownProcessor:
|
|||||||
return structure
|
return structure
|
||||||
|
|
||||||
def merge_structures(self, existing: Dict, new: Dict, document_id: int) -> None:
|
def merge_structures(self, existing: Dict, new: Dict, document_id: int) -> None:
|
||||||
def merge_recursive(existing_node, new_node, parent_id):
|
def merge_recursive(existing_node, new_node, parent_uuid):
|
||||||
if not existing_node:
|
if not existing_node:
|
||||||
# This is a new node, insert it
|
# This is a new node, insert it
|
||||||
heading_id = self.insert_heading(new_node['level'], new_node['title'], parent_id, document_id)
|
heading_uuid = self.insert_heading(new_node['level'], new_node['title'], parent_uuid, document_id, new_node['path'])
|
||||||
self.insert_body(new_node['content'], heading_id, document_id)
|
body_uuid = self.insert_body(new_node['content'], heading_uuid, document_id)
|
||||||
for child in new_node['children']:
|
for child in new_node['children']:
|
||||||
merge_recursive(None, new[child], heading_id)
|
merge_recursive(None, new[child], heading_uuid)
|
||||||
else:
|
else:
|
||||||
# Update existing node
|
# Update existing node
|
||||||
self.update_heading(existing_node['id'], new_node['title'], new_node['level'], parent_id)
|
self.update_heading(existing_node['uuid'], new_node['title'], new_node['level'], parent_uuid, new_node['path'])
|
||||||
self.update_body(existing_node['id'], new_node['content'], document_id)
|
self.update_body(existing_node['body_uuid'], new_node['content'], document_id)
|
||||||
|
|
||||||
# Process children
|
# Process children
|
||||||
existing_children = {child['title']: child for child in existing_node['children']}
|
existing_children = {child['title']: child for child in existing_node['children']}
|
||||||
@ -239,89 +244,92 @@ class MarkdownProcessor:
|
|||||||
|
|
||||||
for title, child in new_children.items():
|
for title, child in new_children.items():
|
||||||
if title in existing_children:
|
if title in existing_children:
|
||||||
merge_recursive(existing_children[title], child, existing_node['id'])
|
merge_recursive(existing_children[title], child, existing_node['uuid'])
|
||||||
else:
|
else:
|
||||||
merge_recursive(None, child, existing_node['id'])
|
merge_recursive(None, child, existing_node['uuid'])
|
||||||
|
|
||||||
for title, child in existing_children.items():
|
for title, child in existing_children.items():
|
||||||
if title not in new_children:
|
if title not in new_children:
|
||||||
self.soft_delete_heading(child['id'])
|
self.soft_delete_heading(child['uuid'])
|
||||||
|
|
||||||
for new_root in new.values():
|
for new_root in new.values():
|
||||||
existing_root = next((node for node in existing.values() if node['title'] == new_root['title']), None)
|
existing_root = next((node for node in existing.values() if node['path'] == new_root['path']), None)
|
||||||
merge_recursive(existing_root, new_root, None)
|
merge_recursive(existing_root, new_root, None)
|
||||||
|
|
||||||
def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
|
def insert_heading(self, level: int, title: str, parent_uuid: Optional[str], document_id: int, path: str) -> str:
|
||||||
|
heading_uuid = str(uuid.uuid4())
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
INSERT INTO headings (level, title, parent_id, document_id)
|
INSERT INTO headings (uuid, level, title, parent_uuid, document_id, path)
|
||||||
VALUES (?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
''', (level, title, parent_id, document_id))
|
''', (heading_uuid, level, title, parent_uuid, document_id, path))
|
||||||
return self.db_manager.cursor.lastrowid
|
return heading_uuid
|
||||||
|
|
||||||
def update_heading(self, heading_id: int, title: str, level: int, parent_id: Optional[int]) -> None:
|
def update_heading(self, heading_uuid: str, title: str, level: int, parent_uuid: Optional[str], path: str) -> None:
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
UPDATE headings
|
UPDATE headings
|
||||||
SET title = ?, level = ?, parent_id = ?, updated_timestamp = CURRENT_TIMESTAMP
|
SET title = ?, level = ?, parent_uuid = ?, path = ?, updated_timestamp = CURRENT_TIMESTAMP
|
||||||
WHERE id = ?
|
WHERE uuid = ?
|
||||||
''', (title, level, parent_id, heading_id))
|
''', (title, level, parent_uuid, path, heading_uuid))
|
||||||
|
|
||||||
def insert_body(self, content: str, heading_id: int, document_id: int) -> None:
|
def insert_body(self, content: str, heading_uuid: str, document_id: int) -> str:
|
||||||
|
body_uuid = str(uuid.uuid4())
|
||||||
md5sum = hashlib.md5(content.encode()).hexdigest()
|
md5sum = hashlib.md5(content.encode()).hexdigest()
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
INSERT INTO body (content, heading_id, document_id, md5sum)
|
INSERT INTO body (uuid, content, heading_uuid, document_id, md5sum)
|
||||||
VALUES (?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?)
|
||||||
''', (content, heading_id, document_id, md5sum))
|
''', (body_uuid, content, heading_uuid, document_id, md5sum))
|
||||||
|
return body_uuid
|
||||||
|
|
||||||
def update_body(self, heading_id: int, content: str, document_id: int) -> None:
|
def update_body(self, body_uuid: str, content: str, document_id: int) -> None:
|
||||||
md5sum = hashlib.md5(content.encode()).hexdigest()
|
md5sum = hashlib.md5(content.encode()).hexdigest()
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
UPDATE body
|
UPDATE body
|
||||||
SET content = ?, md5sum = ?, updated_timestamp = CURRENT_TIMESTAMP
|
SET content = ?, md5sum = ?, updated_timestamp = CURRENT_TIMESTAMP
|
||||||
WHERE heading_id = ? AND document_id = ?
|
WHERE uuid = ? AND document_id = ?
|
||||||
''', (content, md5sum, heading_id, document_id))
|
''', (content, md5sum, body_uuid, document_id))
|
||||||
|
|
||||||
def soft_delete_heading(self, heading_id: int) -> None:
|
def soft_delete_heading(self, heading_uuid: str) -> None:
|
||||||
now = datetime.now().isoformat()
|
now = datetime.now().isoformat()
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
UPDATE headings
|
UPDATE headings
|
||||||
SET isDeleted = 1, deleted_timestamp = ?
|
SET isDeleted = 1, deleted_timestamp = ?
|
||||||
WHERE id = ?
|
WHERE uuid = ?
|
||||||
''', (now, heading_id))
|
''', (now, heading_uuid))
|
||||||
# Also soft delete associated body content
|
# Also soft delete associated body content
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
UPDATE body
|
UPDATE body
|
||||||
SET isDeleted = 1, deleted_timestamp = ?
|
SET isDeleted = 1, deleted_timestamp = ?
|
||||||
WHERE heading_id = ?
|
WHERE heading_uuid = ?
|
||||||
''', (now, heading_id))
|
''', (now, heading_uuid))
|
||||||
|
|
||||||
class TopicReader:
|
class TopicReader:
|
||||||
"""Reads and retrieves topics from the database."""
|
"""Reads and retrieves topics from the database."""
|
||||||
def __init__(self, db_manager: 'DatabaseManager'):
|
def __init__(self, db_manager: 'DatabaseManager'):
|
||||||
self.db_manager = db_manager
|
self.db_manager = db_manager
|
||||||
|
|
||||||
def fetch_headings(self) -> List[Tuple[int, str, int, Optional[int]]]:
|
def fetch_headings(self) -> List[Tuple[str, str, int, Optional[str]]]:
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
SELECT id, title, level, parent_id
|
SELECT uuid, title, level, parent_uuid
|
||||||
FROM headings
|
FROM headings
|
||||||
WHERE isDeleted = 0
|
WHERE isDeleted = 0
|
||||||
ORDER BY level, id
|
ORDER BY level, headings_order
|
||||||
''')
|
''')
|
||||||
return self.db_manager.cursor.fetchall()
|
return self.db_manager.cursor.fetchall()
|
||||||
|
|
||||||
def fetch_topic_chain(self, heading_id: int) -> List[Tuple[int, str, int]]:
|
def fetch_topic_chain(self, heading_uuid: str) -> List[Tuple[str, str, int]]:
|
||||||
chain = []
|
chain = []
|
||||||
current_id = heading_id
|
current_uuid = heading_uuid
|
||||||
|
|
||||||
while current_id is not None:
|
while current_uuid is not None:
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
SELECT id, title, level, parent_id
|
SELECT uuid, title, level, parent_uuid
|
||||||
FROM headings
|
FROM headings
|
||||||
WHERE id = ?
|
WHERE uuid = ?
|
||||||
''', (current_id,))
|
''', (current_uuid,))
|
||||||
result = self.db_manager.cursor.fetchone()
|
result = self.db_manager.cursor.fetchone()
|
||||||
if result:
|
if result:
|
||||||
chain.append((result[0], result[1], result[2]))
|
chain.append((result[0], result[1], result[2]))
|
||||||
current_id = result[3]
|
current_uuid = result[3]
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -331,33 +339,33 @@ class TopicReader:
|
|||||||
headings = self.fetch_headings()
|
headings = self.fetch_headings()
|
||||||
result = "Available headings:\n"
|
result = "Available headings:\n"
|
||||||
|
|
||||||
def build_tree(parent_id, level):
|
def build_tree(parent_uuid, level):
|
||||||
tree = ""
|
tree = ""
|
||||||
for id, title, hlevel, parent in headings:
|
for uuid, title, hlevel, parent in headings:
|
||||||
if parent == parent_id:
|
if parent == parent_uuid:
|
||||||
indent = " " * (hlevel - 1)
|
indent = " " * (hlevel - 1)
|
||||||
tree += f"{indent}- {title}\n"
|
tree += f"{indent}- {title}\n"
|
||||||
tree += build_tree(id, hlevel + 1)
|
tree += build_tree(uuid, hlevel + 1)
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
result += build_tree(None, 1)
|
result += build_tree(None, 1)
|
||||||
return result.strip()
|
return result.strip()
|
||||||
|
|
||||||
def get_topic_content(self, input_title: str) -> Optional[str]:
|
def get_topic_content(self, input_title: str) -> Optional[str]:
|
||||||
heading_id = self.find_closest_heading(input_title)
|
heading_uuid = self.find_closest_heading(input_title)
|
||||||
if heading_id:
|
if heading_uuid:
|
||||||
topic_chain = self.fetch_topic_chain(heading_id)
|
topic_chain = self.fetch_topic_chain(heading_uuid)
|
||||||
result = self.build_full_content(topic_chain[-1][0])
|
result = self.build_full_content(topic_chain[-1][0])
|
||||||
return result
|
return result
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def build_full_content(self, heading_id: int, level_offset: int = 0) -> str:
|
def build_full_content(self, heading_uuid: str, level_offset: int = 0) -> str:
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
SELECT h.level, h.title, b.content
|
SELECT h.level, h.title, b.content
|
||||||
FROM headings h
|
FROM headings h
|
||||||
LEFT JOIN body b ON h.id = b.heading_id
|
LEFT JOIN body b ON h.uuid = b.heading_uuid
|
||||||
WHERE h.id = ? AND h.isDeleted = 0
|
WHERE h.uuid = ? AND h.isDeleted = 0
|
||||||
''', (heading_id,))
|
''', (heading_uuid,))
|
||||||
heading = self.db_manager.cursor.fetchone()
|
heading = self.db_manager.cursor.fetchone()
|
||||||
if not heading:
|
if not heading:
|
||||||
return ""
|
return ""
|
||||||
@ -370,17 +378,17 @@ class TopicReader:
|
|||||||
|
|
||||||
# Fetch and process all child headings
|
# Fetch and process all child headings
|
||||||
self.db_manager.cursor.execute('''
|
self.db_manager.cursor.execute('''
|
||||||
SELECT id FROM headings
|
SELECT uuid FROM headings
|
||||||
WHERE parent_id = ? AND isDeleted = 0
|
WHERE parent_uuid = ? AND isDeleted = 0
|
||||||
ORDER BY level, id
|
ORDER BY level, headings_order
|
||||||
''', (heading_id,))
|
''', (heading_uuid,))
|
||||||
children = self.db_manager.cursor.fetchall()
|
children = self.db_manager.cursor.fetchall()
|
||||||
for child in children:
|
for child in children:
|
||||||
result += self.build_full_content(child[0], level_offset)
|
result += self.build_full_content(child[0], level_offset)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def find_closest_heading(self, input_title: str) -> Optional[int]:
|
def find_closest_heading(self, input_title: str) -> Optional[str]:
|
||||||
headings = self.fetch_headings()
|
headings = self.fetch_headings()
|
||||||
if not headings:
|
if not headings:
|
||||||
print("No topics found in the database.")
|
print("No topics found in the database.")
|
||||||
@ -393,9 +401,9 @@ class TopicReader:
|
|||||||
print(f"No close matches found for '{input_title}' (Confidence: {confidence})")
|
print(f"No close matches found for '{input_title}' (Confidence: {confidence})")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
for heading_id, title, _, _ in headings:
|
for heading_uuid, title, _, _ in headings:
|
||||||
if title == closest_match:
|
if title == closest_match:
|
||||||
return heading_id
|
return heading_uuid
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -408,41 +416,57 @@ def compute_file_hash(file_path: str) -> str:
|
|||||||
for chunk in iter(lambda: f.read(4096), b""):
|
for chunk in iter(lambda: f.read(4096), b""):
|
||||||
hash_md5.update(chunk)
|
hash_md5.update(chunk)
|
||||||
return hash_md5.hexdigest()
|
return hash_md5.hexdigest()
|
||||||
|
|
||||||
def generate_calendar(year: int) -> str:
|
def generate_calendar(year: int, db_manager: 'DatabaseManager', document_id: int) -> str:
|
||||||
"""Generate a markdown calendar for the specified year."""
|
"""
|
||||||
|
Generate a markdown calendar for the specified year.
|
||||||
|
"""
|
||||||
calendar_markdown = f"# {year}\n\n"
|
calendar_markdown = f"# {year}\n\n"
|
||||||
current_date = datetime.now().date()
|
current_date = datetime.now().date()
|
||||||
|
|
||||||
|
# Loop through the months
|
||||||
for month in range(1, 13):
|
for month in range(1, 13):
|
||||||
month_name = datetime(year, month, 1).strftime('%B')
|
month_name = datetime(year, month, 1).strftime('%B')
|
||||||
calendar_markdown += f"## {month:02d} / {month_name}\n\n"
|
calendar_markdown += f"## {month:02d} / {month_name}\n\n"
|
||||||
|
|
||||||
# Calculate the number of days in the month
|
# Determine the number of days in the month
|
||||||
num_days = (datetime(year, month + 1, 1) - datetime(year, month, 1)).days if month < 12 else (datetime(year + 1, 1, 1) - datetime(year, month, 1)).days
|
if month == 12:
|
||||||
|
num_days = (datetime(year + 1, 1, 1) - datetime(year, month, 1)).days
|
||||||
# Generate calendar entries for each day
|
else:
|
||||||
|
num_days = (datetime(year, month + 1, 1) - datetime(year, month, 1)).days
|
||||||
|
|
||||||
|
# Create calendar entries for each day in order
|
||||||
for day in range(1, num_days + 1):
|
for day in range(1, num_days + 1):
|
||||||
day_date = datetime(year, month, day).date()
|
day_date = datetime(year, month, day).date()
|
||||||
day_name = day_date.strftime('%a')
|
day_name = day_date.strftime('%a')
|
||||||
|
|
||||||
# Check if this is the current day and make it bold if so
|
# Add bold styling for the current date
|
||||||
if str(day_date) == str(current_date):
|
if str(day_date) == str(current_date):
|
||||||
calendar_markdown += f"**{day:02d} / {day_name}**\n"
|
calendar_markdown += f"**{day:02d} / {day_name}**\n"
|
||||||
else:
|
else:
|
||||||
calendar_markdown += f"{day:02d} / {day_name}\n"
|
calendar_markdown += f"{day:02d} / {day_name}\n"
|
||||||
|
|
||||||
calendar_markdown += "\n" # Add a newline after each month
|
calendar_markdown += '\n'
|
||||||
|
|
||||||
|
# Now parse the markdown and insert into the database
|
||||||
|
#parse_and_insert_markdown(calendar_markdown, db_manager, document_id, year)
|
||||||
|
|
||||||
return calendar_markdown
|
return calendar_markdown
|
||||||
|
|
||||||
def convert_to_html(markdown_content: str) -> str:
|
def convert_to_html(markdown_content: str, heading_uuid: Optional[str] = None) -> str:
|
||||||
"""
|
"""
|
||||||
Convert Markdown content to HTML.
|
Convert Markdown content (or specific section) to HTML.
|
||||||
"""
|
"""
|
||||||
md = MarkdownIt()
|
md = MarkdownIt()
|
||||||
html_content = md.render(markdown_content)
|
|
||||||
|
|
||||||
|
if heading_uuid:
|
||||||
|
# Fetch content for a specific heading and its sub-headings from the database
|
||||||
|
# Example SQL to get heading content based on UUID:
|
||||||
|
# SELECT title, content FROM headings WHERE uuid = ?
|
||||||
|
pass
|
||||||
|
|
||||||
|
html_content = md.render(markdown_content)
|
||||||
|
|
||||||
# Wrap the content in a basic HTML structure
|
# Wrap the content in a basic HTML structure
|
||||||
html_document = f"""
|
html_document = f"""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@ -450,7 +474,7 @@ def convert_to_html(markdown_content: str) -> str:
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>Calendar</title>
|
<title>Document</title>
|
||||||
<style>
|
<style>
|
||||||
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
|
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
|
||||||
h1, h2, h3, h4, h5, h6 {{ margin-top: 24px; margin-bottom: 16px; }}
|
h1, h2, h3, h4, h5, h6 {{ margin-top: 24px; margin-bottom: 16px; }}
|
||||||
@ -464,160 +488,99 @@ def convert_to_html(markdown_content: str) -> str:
|
|||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return html_document
|
return html_document
|
||||||
|
|
||||||
|
|
||||||
def bootstrap_calendar(year: int, db_manager: DatabaseManager, markdown_file: str):
|
def bootstrap_calendar(year: int, db_manager: DatabaseManager, markdown_file: str):
|
||||||
"""Generate and store a full year's markdown calendar in the database."""
|
"""Generate and store a full year's markdown calendar in the database using UUIDs."""
|
||||||
calendar_content = generate_calendar(year)
|
document_manager = DocumentManager(db_manager)
|
||||||
|
markdown_processor = MarkdownProcessor(db_manager)
|
||||||
# Write the calendar to the specified markdown file
|
|
||||||
|
# Generate calendar markdown and insert into the database
|
||||||
|
print('## Generating calendar')
|
||||||
|
calendar_content = generate_calendar(year, db_manager, document_id=document_manager.create_document(f"{year} Calendar", markdown_file))
|
||||||
|
|
||||||
|
# Write the calendar to the markdown file
|
||||||
|
print('## Reading the newly created calendar')
|
||||||
with open(markdown_file, 'w', encoding='utf-8') as f:
|
with open(markdown_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(calendar_content)
|
f.write(calendar_content)
|
||||||
|
|
||||||
# Now use the DocumentManager and MarkdownProcessor to read this file into the database
|
# Process the markdown to update or store in the database
|
||||||
document_manager = DocumentManager(db_manager)
|
markdown_processor.process_markdown(markdown_file, document_manager.document_exists(f"{year} Calendar")[0])
|
||||||
document_id = document_manager.create_document(os.path.basename(markdown_file), markdown_file)
|
|
||||||
markdown_processor = MarkdownProcessor(db_manager)
|
|
||||||
markdown_processor.process_markdown(markdown_file, document_id)
|
|
||||||
print(f"Calendar for year {year} has been generated and stored in the database.")
|
print(f"Calendar for year {year} has been generated and stored in the database.")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
This script processes a markdown file, updates an SQLite database,
|
This script processes a markdown file, updates an SQLite database,
|
||||||
and optionally selects a topic based on user input.
|
and optionally selects a topic based on user input.
|
||||||
|
|
||||||
Initializes managers for database and markdown handling. Updates documents based on
|
|
||||||
MD5 hash changes, and if a topic is provided, retrieves and writes its content
|
|
||||||
to the markdown file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
-m, --markdown: Path to markdown file (default: 'calendar.md').
|
|
||||||
-d, --database: Path to SQLite database file (default: 'markdown.db').
|
|
||||||
topic_title: Optional topic for content selection (fuzzy matching enabled).
|
|
||||||
--bootstrap: If provided, generates markdown calendar for the current year and loads it to the database.
|
|
||||||
--ls: If provided, lists all available headings.
|
|
||||||
--html: If provided, will produce {filename}.html file along the markdown file.
|
|
||||||
"""
|
"""
|
||||||
# Set up command-line argument parsing
|
|
||||||
parser = argparse.ArgumentParser(description='Process markdown file and optionally select a topic.')
|
parser = argparse.ArgumentParser(description='Process markdown file and optionally select a topic.')
|
||||||
parser.add_argument('-m', '--markdown', type=str, default='calendar.md', help='Input/output markdown file (default: calendar.md)')
|
parser.add_argument('-m', '--markdown', type=str, default='calendar.md', help='Input/output markdown file (default: calendar.md)')
|
||||||
parser.add_argument('-d', '--database', type=str, default='markdown.db', help='SQLite database file (default: markdown.db)')
|
parser.add_argument('-d', '--database', type=str, default='markdown.db', help='SQLite database file (default: markdown.db)')
|
||||||
parser.add_argument('topic_title', nargs='?', type=str, help='Topic title to select (fuzzy matching enabled)')
|
parser.add_argument('topic_title', nargs='?', type=str, help='Topic title to select (fuzzy matching enabled)')
|
||||||
parser.add_argument('--bootstrap', action='store_true', help='Generate markdown calendar for the current year and load it to the database.')
|
parser.add_argument('--bootstrap', action='store_true', help='Generate markdown calendar for the current year and load it to the database.')
|
||||||
parser.add_argument('--ls', action='store_true', help='List all available headings.')
|
parser.add_argument('--ls', action='store_true', help='List all available headings.')
|
||||||
parser.add_argument('--html', action='store_true', help='Generate an HTML version of the output')
|
parser.add_argument('--html', action='store_true', help='Generate an HTML version of the output.')
|
||||||
|
parser.add_argument('--uuid', type=str, help='Specify a UUID to retrieve content.')
|
||||||
parser.add_argument('--debug', action='store_true', help='Enable debug printing')
|
parser.add_argument('--debug', action='store_true', help='Enable debug printing')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Set up logging
|
# Setup basic logging
|
||||||
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
||||||
|
|
||||||
|
# Check for markdown file presence
|
||||||
# Use the provided or default file paths
|
if not os.path.exists(args.markdown) and not args.bootstrap:
|
||||||
markdown_file = args.markdown
|
print(f"Error: Markdown file '{args.markdown}' not found. Use --bootstrap to create a new calendar.")
|
||||||
database_file = args.database
|
db_manager.close()
|
||||||
|
return
|
||||||
# Initialize manager objects for database operations
|
# Check for databse file presence
|
||||||
db_manager = DatabaseManager(database_file)
|
if not os.path.exists(args.database) and not args.bootstrap:
|
||||||
|
print(f"Error: Database file '{args.database}' not found. Use --bootstrap to create a new calendar.")
|
||||||
if args.bootstrap:
|
|
||||||
bootstrap_calendar(datetime.now().year, db_manager, markdown_file)
|
|
||||||
db_manager.close()
|
db_manager.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Initialize manager objects
|
||||||
|
db_manager = DatabaseManager(args.database)
|
||||||
|
|
||||||
|
if args.bootstrap:
|
||||||
|
print('## Running calendar bootstrap')
|
||||||
|
bootstrap_calendar(datetime.now().year, db_manager, args.markdown)
|
||||||
|
db_manager.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
document_manager = DocumentManager(db_manager)
|
||||||
|
|
||||||
if args.ls:
|
if args.ls:
|
||||||
topic_reader = TopicReader(db_manager)
|
topic_reader = TopicReader(db_manager)
|
||||||
print(topic_reader.list_headings())
|
print(topic_reader.list_headings())
|
||||||
db_manager.close()
|
db_manager.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if the markdown file exists
|
# Topic or UUID-based content retrieval
|
||||||
if not os.path.exists(markdown_file):
|
if args.topic_title or args.uuid:
|
||||||
print(f"Error: Markdown file '{markdown_file}' not found. Use --bootstrap to create a new calendar.")
|
|
||||||
db_manager.close()
|
|
||||||
return
|
|
||||||
|
|
||||||
document_manager = DocumentManager(db_manager)
|
|
||||||
markdown_processor = MarkdownProcessor(db_manager)
|
|
||||||
|
|
||||||
# Get the base name of the markdown file (without path)
|
|
||||||
document_name = os.path.basename(markdown_file)
|
|
||||||
|
|
||||||
# Check if the document already exists in the database
|
|
||||||
document = db_manager.cursor.execute('SELECT id, file_path, md5sum, updated_timestamp FROM documents WHERE name = ?', (document_name,)).fetchone()
|
|
||||||
|
|
||||||
# Compute the current MD5 hash of the markdown file
|
|
||||||
current_file_hash = compute_file_hash(markdown_file)
|
|
||||||
|
|
||||||
if document:
|
|
||||||
# If the document exists in the database
|
|
||||||
document_id, stored_file_path, stored_md5sum, last_updated = document
|
|
||||||
|
|
||||||
if stored_file_path != markdown_file:
|
|
||||||
print(f"Updating file path for '{document_name}' in the database...")
|
|
||||||
document_manager.update_document(document_id, file_path=markdown_file)
|
|
||||||
|
|
||||||
if stored_md5sum != current_file_hash:
|
|
||||||
# If the file has changed since last update
|
|
||||||
print(f"File '{document_name}' has changed. Updating the database...")
|
|
||||||
document_manager.update_document_hash(document_id, current_file_hash)
|
|
||||||
markdown_processor.process_markdown(markdown_file, document_id)
|
|
||||||
else:
|
|
||||||
# If the file hasn't changed
|
|
||||||
print(f"File '{document_name}' has not changed. Skipping update.")
|
|
||||||
else:
|
|
||||||
# If the document doesn't exist in the database
|
|
||||||
print(f"Document '{document_name}' not found in the database. Adding new entry...")
|
|
||||||
document_id = document_manager.create_document(document_name, markdown_file)
|
|
||||||
document_manager.update_document_hash(document_id, current_file_hash)
|
|
||||||
markdown_processor.process_markdown(markdown_file, document_id)
|
|
||||||
|
|
||||||
# Check if a topic title argument is provided
|
|
||||||
if args.topic_title:
|
|
||||||
# Initialize TopicReader
|
|
||||||
topic_reader = TopicReader(db_manager)
|
topic_reader = TopicReader(db_manager)
|
||||||
|
|
||||||
# Retrieve the content for the specified topic
|
if args.uuid:
|
||||||
result = topic_reader.get_topic_content(args.topic_title)
|
content = topic_reader.build_full_content(args.uuid)
|
||||||
|
|
||||||
if result:
|
|
||||||
# If content is found, write it back to the original markdown file
|
|
||||||
with open(markdown_file, 'w', encoding='utf-8') as file:
|
|
||||||
file.write(result)
|
|
||||||
file.write('\n')
|
|
||||||
print(f"Selected topic and subtopics written to {markdown_file}")
|
|
||||||
|
|
||||||
# Generate HTML if --html option is specified
|
|
||||||
if args.html:
|
|
||||||
html_file = os.path.splitext(markdown_file)[0] + '.html'
|
|
||||||
html_content = convert_to_html(result)
|
|
||||||
with open(html_file, 'w', encoding='utf-8') as file:
|
|
||||||
file.write(html_content)
|
|
||||||
print(f"HTML version written to {html_file}")
|
|
||||||
|
|
||||||
# Update the document hash in the database
|
|
||||||
new_file_hash = compute_file_hash(markdown_file)
|
|
||||||
document_manager.update_document_hash(document_id, new_file_hash)
|
|
||||||
else:
|
else:
|
||||||
# If no content is found
|
content = topic_reader.get_topic_content(args.topic_title)
|
||||||
print("No result to write. The original file remains unchanged.")
|
|
||||||
else:
|
|
||||||
print("No topic title provided. The database has been updated/added without modifying the file.")
|
|
||||||
|
|
||||||
# Generate HTML for the entire document if --html option is specified
|
if content:
|
||||||
|
# Write the selected content to the markdown file
|
||||||
|
with open(args.markdown, 'w', encoding='utf-8') as file:
|
||||||
|
file.write(content)
|
||||||
|
file.write('\n')
|
||||||
|
print(f"Selected content written to {args.markdown}")
|
||||||
|
|
||||||
|
# Optionally convert to HTML
|
||||||
if args.html:
|
if args.html:
|
||||||
with open(markdown_file, 'r', encoding='utf-8') as file:
|
html_file = f"{args.markdown}.html"
|
||||||
markdown_content = file.read()
|
|
||||||
html_file = os.path.splitext(markdown_file)[0] + '.html'
|
|
||||||
html_content = convert_to_html(markdown_content)
|
|
||||||
with open(html_file, 'w', encoding='utf-8') as file:
|
with open(html_file, 'w', encoding='utf-8') as file:
|
||||||
file.write(html_content)
|
file.write(convert_to_html(content))
|
||||||
print(f"HTML version of the entire document written to {html_file}")
|
print(f"HTML version written to {html_file}")
|
||||||
|
|
||||||
# Close the database connection
|
|
||||||
db_manager.close()
|
db_manager.close()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
Loading…
Reference in New Issue
Block a user