Trying to make sense of the MarkdownProcessor..

2024-10-05 20:59:32 +03:00 · 2024-10-05 20:59:32 +03:00 · b94502be9d
commit b94502be9d
parent 3d35990b3e
1 changed files with 291 additions and 220 deletions
--- a/markdown_sqlite.py
+++ b/markdown_sqlite.py
@ -43,29 +43,33 @@ class DatabaseManager:

            CREATE TABLE IF NOT EXISTS headings (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                uuid TEXT NOT NULL UNIQUE,
                level INTEGER NOT NULL,
                title TEXT NOT NULL,
-                parent_id INTEGER,
+                parent_uuid TEXT,
                document_id INTEGER NOT NULL,
+                path TEXT NOT NULL,
+                headings_order INTEGER,
                added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                updated_timestamp DATETIME,
                deleted_timestamp DATETIME,
                isDeleted BOOLEAN DEFAULT 0,
-                FOREIGN KEY (parent_id) REFERENCES headings(id),
+                FOREIGN KEY (parent_uuid) REFERENCES headings(uuid),
                FOREIGN KEY (document_id) REFERENCES documents(id)
            );

            CREATE TABLE IF NOT EXISTS body (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                uuid TEXT NOT NULL UNIQUE,
                content TEXT,
-                heading_id INTEGER NOT NULL,
+                heading_uuid TEXT NOT NULL,
                document_id INTEGER NOT NULL,
                md5sum TEXT,
                added_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                updated_timestamp DATETIME,
                deleted_timestamp DATETIME,
                isDeleted BOOLEAN DEFAULT 0,
-                FOREIGN KEY (heading_id) REFERENCES headings(id),
+                FOREIGN KEY (heading_uuid) REFERENCES headings(uuid),
                FOREIGN KEY (document_id) REFERENCES documents(id)
            );
        ''')
@ -83,12 +87,6 @@ class DocumentManager:
    def document_exists(self, document_name: str) -> Optional[Tuple[int]]:
        """
        Check if a document exists in the database.
-
-        Args:
-            document_name: Name of the document to check.
-
-        Returns:
-            Document ID if it exists, None otherwise.
        """
        self.db_manager.cursor.execute('SELECT id FROM documents WHERE name = ?', (document_name,))
        return self.db_manager.cursor.fetchone()
@ -119,7 +117,7 @@ class DocumentManager:

    def soft_delete_document(self, document_id: int) -> None:
        """Soft delete a document by marking it as deleted in the database."""
-        logging.debug(f"** This now soft deleted, document_id: {document_id}")
+        logging.debug(f"** This document is now soft deleted, document_id: {document_id}")
        now: str = datetime.now().isoformat()
        self.db_manager.cursor.execute('''
            UPDATE documents SET isDeleted = 1, deleted_timestamp = ? WHERE id = ?
@ -146,6 +144,7 @@ class MarkdownProcessor:
        md = MarkdownIt()
        tokens = md.parse(markdown_text)
        
+        print('### Calling update_document_content')
        self.update_document_content(tokens, document_id)

    def read_markdown_file(self, file_path: str) -> str:
@ -153,40 +152,44 @@ class MarkdownProcessor:
            return file.read()

    def update_document_content(self, tokens: List, document_id: int) -> None:
+        existing_structure = {}
        existing_structure = self.get_existing_document_structure(document_id)
-        new_structure = self.parse_new_structure(tokens)
-        
+        new_structure = self.parse_new_structure(tokens, document_id, existing_structure)
+        print('### Calling merge_structures...')
        self.merge_structures(existing_structure, new_structure, document_id)

+            
    def get_existing_document_structure(self, document_id: int) -> Dict:
        structure = {}
        self.db_manager.cursor.execute('''
-            SELECT h.id, h.level, h.title, h.parent_id, b.content
+            SELECT h.uuid, h.level, h.title, h.parent_uuid, h.path, b.content, b.uuid
            FROM headings h
-            LEFT JOIN body b ON h.id = b.heading_id
+            LEFT JOIN body b ON h.uuid = b.heading_uuid
            WHERE h.document_id = ? AND h.isDeleted = 0
            ORDER BY h.level, h.id
        ''', (document_id,))
-        for heading_id, level, title, parent_id, content in self.db_manager.cursor.fetchall():
-            structure[heading_id] = {
-                'id': heading_id,  # Add this line to include the id in each node
+        for heading_uuid, level, title, parent_uuid, path, content, body_uuid in self.db_manager.cursor.fetchall():
+            structure[heading_uuid] = {
+                'uuid': heading_uuid,
                'level': level,
                'title': title,
-                'parent_id': parent_id,
+                'parent_uuid': parent_uuid,
+                'path': path,
                'content': content,
+                'body_uuid': body_uuid,
                'children': []
            }
        # Build the tree structure
-        for id, node in structure.items():
-            if node['parent_id'] in structure:
-                structure[node['parent_id']]['children'].append(id)
+        for uuid, node in structure.items():
+            if node['parent_uuid'] in structure:
+                structure[node['parent_uuid']]['children'].append(uuid)
        return structure 

-    def parse_new_structure(self, tokens: List) -> Dict:
+    def parse_new_structure(self, tokens: List, document_id: int, existing_structure: Dict) -> Dict:
        structure = {}
        current_heading = None
        current_content = []
-        parent_stack = [{"id": None, "level": 0}]
+        parent_stack = [{"uuid": None, "level": 0, "path": ""}]
    
        for token in tokens:
            if token.type == 'heading_open':
@ -195,21 +198,41 @@ class MarkdownProcessor:
                level = int(token.tag.strip('h'))
                while parent_stack[-1]['level'] >= level:
                    parent_stack.pop()
-                current_heading = str(uuid.uuid4())  # Generate a temporary ID
+    
+                parent_path = parent_stack[-1]['path']
+                current_heading = str(uuid.uuid4())  # Always assign a new UUID here, may change later
+                
                structure[current_heading] = {
+                    'uuid': current_heading,
                    'level': level,
                    'title': '',
-                    'parent_id': parent_stack[-1]['id'],
+                    'parent_uuid': parent_stack[-1]['uuid'],
+                    'path': f"{parent_path}/{current_heading}" if parent_path else current_heading,
                    'content': '',
                    'children': []
                }
-                parent_stack.append({"id": current_heading, "level": level})
+                parent_stack.append({"uuid": current_heading, "level": level, "path": structure[current_heading]['path']})
                current_content = []
+    
            elif token.type == 'heading_close':
                structure[current_heading]['content'] = ''.join(current_content).strip()
+    
            elif token.type == 'inline' and current_heading:
                if structure[current_heading]['title'] == '':
+                    # Populate the title
                    structure[current_heading]['title'] = token.content
+                    
+                    # Now check for existing UUID based on title, level, and parent
+                    existing_uuid = next(
+                        (uuid for uuid, node in existing_structure.items()
+                         if node['title'] == structure[current_heading]['title']
+                         and node['level'] == structure[current_heading]['level']
+                         and node['parent_uuid'] == structure[current_heading]['parent_uuid']), None)
+                    
+                    if existing_uuid:
+                        # If found in existing structure, replace the new UUID
+                        structure[existing_uuid] = structure.pop(current_heading)
+                        current_heading = existing_uuid
                else:
                    current_content.append(token.content)
            elif current_heading:
@ -221,17 +244,45 @@ class MarkdownProcessor:
        return structure

    def merge_structures(self, existing: Dict, new: Dict, document_id: int) -> None:
-        def merge_recursive(existing_node, new_node, parent_id):
+        logging.info(f"Starting merge_structures for document_id: {document_id}")
+    
+        def merge_recursive(existing_node, new_node, parent_uuid):
+            logging.debug(f"Processing node: {new_node['title']}")
+    
            if not existing_node:
-                # This is a new node, insert it
-                heading_id = self.insert_heading(new_node['level'], new_node['title'], parent_id, document_id)
-                self.insert_body(new_node['content'], heading_id, document_id)
+                logging.debug(f"No existing node found for {new_node['title']}")
+                # Check if a heading with the same title already exists at this level
+                self.db_manager.cursor.execute('''
+                    SELECT uuid FROM headings 
+                    WHERE title = ? AND level = ? AND parent_uuid = ? AND document_id = ? AND isDeleted = 0
+                    ''', (new_node['title'], new_node['level'], parent_uuid, document_id))
+                existing_uuid = self.db_manager.cursor.fetchone()
+                
+                if existing_uuid:
+                    heading_uuid = existing_uuid[0]
+                    logging.info(f"Updating existing heading: {new_node['title']} (UUID: {heading_uuid})")
+                    self.update_heading(heading_uuid, new_node['title'], new_node['level'], parent_uuid, new_node['path'])
+                else:
+                    logging.info(f"Inserting new heading: {new_node['title']}")
+                    heading_uuid = self.insert_heading(new_node['level'], new_node['title'], parent_uuid, document_id, new_node['path'])
+                
+                if new_node['content']:
+                    logging.debug(f"Inserting body content for heading: {new_node['title']}")
+                    body_uuid = self.insert_body(new_node['content'], heading_uuid, document_id)
+                
                for child in new_node['children']:
-                    merge_recursive(None, new[child], heading_id)
+                    merge_recursive(None, new[child], heading_uuid)
            else:
+                logging.debug(f"Updating existing node: {existing_node['title']}")
                # Update existing node
-                self.update_heading(existing_node['id'], new_node['title'], new_node['level'], parent_id)
-                self.update_body(existing_node['id'], new_node['content'], document_id)
+                self.update_heading(existing_node['uuid'], new_node['title'], new_node['level'], parent_uuid, new_node['path'])
+                if new_node['content']:
+                    if existing_node['body_uuid']:
+                        logging.debug(f"Updating body content for heading: {existing_node['title']}")
+                        self.update_body(existing_node['body_uuid'], new_node['content'], document_id)
+                    else:
+                        logging.debug(f"Inserting new body content for existing heading: {existing_node['title']}")
+                        self.insert_body(new_node['content'], existing_node['uuid'], document_id)
                
                # Process children
                existing_children = {child['title']: child for child in existing_node['children']}
@ -239,89 +290,125 @@ class MarkdownProcessor:
                
                for title, child in new_children.items():
                    if title in existing_children:
-                        merge_recursive(existing_children[title], child, existing_node['id'])
+                        merge_recursive(existing_children[title], child, existing_node['uuid'])
                    else:
-                        merge_recursive(None, child, existing_node['id'])
+                        merge_recursive(None, child, existing_node['uuid'])
                
                for title, child in existing_children.items():
                    if title not in new_children:
-                        self.soft_delete_heading(child['id'])
+                        logging.info(f"Soft deleting heading: {child['title']}")
+                        self.soft_delete_heading(child['uuid'])
    
        for new_root in new.values():
-            existing_root = next((node for node in existing.values() if node['title'] == new_root['title']), None)
+            logging.info(f"Processing root node: {new_root['title']}")
+            existing_root = next((node for node in existing.values() if node['path'] == new_root['path']), None)
            merge_recursive(existing_root, new_root, None)
    
-    def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
-        self.db_manager.cursor.execute('''
-            INSERT INTO headings (level, title, parent_id, document_id)
-            VALUES (?, ?, ?, ?)
-        ''', (level, title, parent_id, document_id))
-        return self.db_manager.cursor.lastrowid
+        logging.info("Merge structures completed")

-    def update_heading(self, heading_id: int, title: str, level: int, parent_id: Optional[int]) -> None:
+    def insert_heading(self, level: int, title: str, parent_uuid: Optional[str], document_id: int, path: str) -> str:
+        heading_uuid = str(uuid.uuid4())
+        self.db_manager.cursor.execute('''
+            INSERT INTO headings (uuid, level, title, parent_uuid, document_id, path)
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (heading_uuid, level, title, parent_uuid, document_id, path))
+        return heading_uuid
+
+    def update_heading(self, heading_uuid: str, title: str, level: int, parent_uuid: Optional[str], path: str) -> None:
        self.db_manager.cursor.execute('''
            UPDATE headings
-            SET title = ?, level = ?, parent_id = ?, updated_timestamp = CURRENT_TIMESTAMP
-            WHERE id = ?
-        ''', (title, level, parent_id, heading_id))
+            SET title = ?, level = ?, parent_uuid = ?, path = ?, updated_timestamp = CURRENT_TIMESTAMP
+            WHERE uuid = ?
+        ''', (title, level, parent_uuid, path, heading_uuid))

-    def insert_body(self, content: str, heading_id: int, document_id: int) -> None:
+    def insert_body(self, content: str, heading_uuid: str, document_id: int) -> str:
+        body_uuid = str(uuid.uuid4())
        md5sum = hashlib.md5(content.encode()).hexdigest()
-        self.db_manager.cursor.execute('''
-            INSERT INTO body (content, heading_id, document_id, md5sum)
-            VALUES (?, ?, ?, ?)
-        ''', (content, heading_id, document_id, md5sum))
+        print(f"###### Trying to insert body text with md5sum of: {md5sum} to uuid: {body_uuid}, with content: \n{content}\n")
        
-    def update_body(self, heading_id: int, content: str, document_id: int) -> None:
+        # Verify input parameters
+        if not all([content, heading_uuid, document_id]):
+            raise ValueError("Missing required parameters for insert_body")
+    
+        try:
+            # Check if heading_uuid exists
+            self.db_manager.cursor.execute("SELECT 1 FROM headings WHERE uuid = ?", (heading_uuid,))
+            if not self.db_manager.cursor.fetchone():
+                raise ValueError(f"heading_uuid {heading_uuid} does not exist in headings table")
+    
+            # Check if document_id exists
+            self.db_manager.cursor.execute("SELECT 1 FROM documents WHERE id = ?", (document_id,))
+            if not self.db_manager.cursor.fetchone():
+                raise ValueError(f"document_id {document_id} does not exist in documents table")
+    
+            # Insert the body
+            self.db_manager.cursor.execute('''
+                INSERT INTO body (uuid, content, heading_uuid, document_id, md5sum)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (body_uuid, content, heading_uuid, document_id, md5sum))
+            
+            self.db_manager.conn.commit()
+            print(f"###### Successfully inserted body with uuid: {body_uuid}")
+        except sqlite3.Error as e:
+            print(f"An error occurred while inserting body: {e}")
+            self.db_manager.conn.rollback()
+            raise
+        except ValueError as e:
+            print(f"Validation error: {e}")
+            raise
+    
+        return body_uuid
+
+    def update_body(self, body_uuid: str, content: str, document_id: int) -> None:
        md5sum = hashlib.md5(content.encode()).hexdigest()
        self.db_manager.cursor.execute('''
            UPDATE body
            SET content = ?, md5sum = ?, updated_timestamp = CURRENT_TIMESTAMP
-            WHERE heading_id = ? AND document_id = ?
-        ''', (content, md5sum, heading_id, document_id))
+            WHERE uuid = ? AND document_id = ?
+        ''', (content, md5sum, body_uuid, document_id))

-    def soft_delete_heading(self, heading_id: int) -> None:
+    def soft_delete_heading(self, heading_uuid: str) -> None:
        now = datetime.now().isoformat()
        self.db_manager.cursor.execute('''
            UPDATE headings
            SET isDeleted = 1, deleted_timestamp = ?
-            WHERE id = ?
-        ''', (now, heading_id))
+            WHERE uuid = ?
+        ''', (now, heading_uuid))
        # Also soft delete associated body content
        self.db_manager.cursor.execute('''
            UPDATE body
            SET isDeleted = 1, deleted_timestamp = ?
-            WHERE heading_id = ?
-        ''', (now, heading_id))
+            WHERE heading_uuid = ?
+        ''', (now, heading_uuid))

 class TopicReader:
    """Reads and retrieves topics from the database."""
    def __init__(self, db_manager: 'DatabaseManager'):
        self.db_manager = db_manager

-    def fetch_headings(self) -> List[Tuple[int, str, int, Optional[int]]]:
+    def fetch_headings(self) -> List[Tuple[str, str, int, Optional[str]]]:
        self.db_manager.cursor.execute('''
-            SELECT id, title, level, parent_id 
+            SELECT uuid, title, level, parent_uuid 
            FROM headings 
            WHERE isDeleted = 0 
-            ORDER BY level, id
+            ORDER BY level, headings_order
        ''')
        return self.db_manager.cursor.fetchall()

-    def fetch_topic_chain(self, heading_id: int) -> List[Tuple[int, str, int]]:
+    def fetch_topic_chain(self, heading_uuid: str) -> List[Tuple[str, str, int]]:
        chain = []
-        current_id = heading_id
+        current_uuid = heading_uuid

-        while current_id is not None:
+        while current_uuid is not None:
            self.db_manager.cursor.execute('''
-                SELECT id, title, level, parent_id 
+                SELECT uuid, title, level, parent_uuid 
                FROM headings 
-                WHERE id = ?
-            ''', (current_id,))
+                WHERE uuid = ?
+            ''', (current_uuid,))
            result = self.db_manager.cursor.fetchone()
            if result:
                chain.append((result[0], result[1], result[2]))
-                current_id = result[3]
+                current_uuid = result[3]
            else:
                break

@ -331,33 +418,33 @@ class TopicReader:
        headings = self.fetch_headings()
        result = "Available headings:\n"
        
-        def build_tree(parent_id, level):
+        def build_tree(parent_uuid, level):
            tree = ""
-            for id, title, hlevel, parent in headings:
-                if parent == parent_id:
+            for uuid, title, hlevel, parent in headings:
+                if parent == parent_uuid:
                    indent = "  " * (hlevel - 1)
                    tree += f"{indent}- {title}\n"
-                    tree += build_tree(id, hlevel + 1)
+                    tree += build_tree(uuid, hlevel + 1)
            return tree
        
        result += build_tree(None, 1)
        return result.strip()

    def get_topic_content(self, input_title: str) -> Optional[str]:
-        heading_id = self.find_closest_heading(input_title)
-        if heading_id:
-            topic_chain = self.fetch_topic_chain(heading_id)
+        heading_uuid = self.find_closest_heading(input_title)
+        if heading_uuid:
+            topic_chain = self.fetch_topic_chain(heading_uuid)
            result = self.build_full_content(topic_chain[-1][0])
            return result
        return None

-    def build_full_content(self, heading_id: int, level_offset: int = 0) -> str:
+    def build_full_content(self, heading_uuid: str, level_offset: int = 0) -> str:
        self.db_manager.cursor.execute('''
            SELECT h.level, h.title, b.content
            FROM headings h
-            LEFT JOIN body b ON h.id = b.heading_id
-            WHERE h.id = ? AND h.isDeleted = 0
-        ''', (heading_id,))
+            LEFT JOIN body b ON h.uuid = b.heading_uuid
+            WHERE h.uuid = ? AND h.isDeleted = 0
+        ''', (heading_uuid,))
        heading = self.db_manager.cursor.fetchone()
        if not heading:
            return ""
@ -370,17 +457,17 @@ class TopicReader:

        # Fetch and process all child headings
        self.db_manager.cursor.execute('''
-            SELECT id FROM headings
-            WHERE parent_id = ? AND isDeleted = 0
-            ORDER BY level, id
-        ''', (heading_id,))
+            SELECT uuid FROM headings
+            WHERE parent_uuid = ? AND isDeleted = 0
+            ORDER BY level, headings_order
+        ''', (heading_uuid,))
        children = self.db_manager.cursor.fetchall()
        for child in children:
            result += self.build_full_content(child[0], level_offset)

        return result

-    def find_closest_heading(self, input_title: str) -> Optional[int]:
+    def find_closest_heading(self, input_title: str) -> Optional[str]:
        headings = self.fetch_headings()
        if not headings:
            print("No topics found in the database.")
@ -393,9 +480,9 @@ class TopicReader:
            print(f"No close matches found for '{input_title}' (Confidence: {confidence})")
            return None

-        for heading_id, title, _, _ in headings:
+        for heading_uuid, title, _, _ in headings:
            if title == closest_match:
-                return heading_id
+                return heading_uuid

        return None

@ -409,38 +496,54 @@ def compute_file_hash(file_path: str) -> str:
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
    
-def generate_calendar(year: int) -> str:
-    """Generate a markdown calendar for the specified year."""
+def generate_calendar(year: int, db_manager: 'DatabaseManager', document_id: int) -> str:
+    """
+    Generate a markdown calendar for the specified year.
+    """
    calendar_markdown = f"# {year}\n\n"
    current_date = datetime.now().date()

+    # Loop through the months
    for month in range(1, 13):
        month_name = datetime(year, month, 1).strftime('%B')
        calendar_markdown += f"## {month:02d} / {month_name}\n\n"

-        # Calculate the number of days in the month
-        num_days = (datetime(year, month + 1, 1) - datetime(year, month, 1)).days if month < 12 else (datetime(year + 1, 1, 1) - datetime(year, month, 1)).days
+        # Determine the number of days in the month
+        if month == 12:
+            num_days = (datetime(year + 1, 1, 1) - datetime(year, month, 1)).days
+        else:
+            num_days = (datetime(year, month + 1, 1) - datetime(year, month, 1)).days

-        # Generate calendar entries for each day
+        # Create calendar entries for each day in order
        for day in range(1, num_days + 1):
            day_date = datetime(year, month, day).date()
            day_name = day_date.strftime('%a')

-            # Check if this is the current day and make it bold if so
+            # Add bold styling for the current date
            if str(day_date) == str(current_date):
                calendar_markdown += f"**{day:02d} / {day_name}**\n"
            else:
                calendar_markdown += f"{day:02d} / {day_name}\n"

-        calendar_markdown += "\n"  # Add a newline after each month
+        calendar_markdown += '\n'
+
+    # Now parse the markdown and insert into the database
+    #parse_and_insert_markdown(calendar_markdown, db_manager, document_id, year)
    
    return calendar_markdown
    
-def convert_to_html(markdown_content: str) -> str:
+def convert_to_html(markdown_content: str, heading_uuid: Optional[str] = None) -> str:
    """
-    Convert Markdown content to HTML.
+    Convert Markdown content (or specific section) to HTML.
    """
    md = MarkdownIt()
+    
+    if heading_uuid:
+        # Fetch content for a specific heading and its sub-headings from the database
+        # Example SQL to get heading content based on UUID:
+        # SELECT title, content FROM headings WHERE uuid = ?
+        pass
+
    html_content = md.render(markdown_content)

    # Wrap the content in a basic HTML structure
@ -450,7 +553,7 @@ def convert_to_html(markdown_content: str) -> str:
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>Calendar</title>
+        <title>Document</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
            h1, h2, h3, h4, h5, h6 {{ margin-top: 24px; margin-bottom: 16px; }}
@ -467,65 +570,94 @@ def convert_to_html(markdown_content: str) -> str:

    return html_document

-
 def bootstrap_calendar(year: int, db_manager: DatabaseManager, markdown_file: str):
-    """Generate and store a full year's markdown calendar in the database."""
-    calendar_content = generate_calendar(year)
+    """Generate and store a full year's markdown calendar in the database using UUIDs."""
+    document_manager = DocumentManager(db_manager)
+    markdown_processor = MarkdownProcessor(db_manager)

-    # Write the calendar to the specified markdown file
+    print('## Generating calendar')
+    document_id = document_manager.create_document(f"{year} Calendar", markdown_file)
+    calendar_content = generate_calendar(year, db_manager, document_id=document_id)
+
+    print('## Writing the newly created calendar')
    with open(markdown_file, 'w', encoding='utf-8') as f:
        f.write(calendar_content)

-    # Now use the DocumentManager and MarkdownProcessor to read this file into the database
-    document_manager = DocumentManager(db_manager)
-    document_id = document_manager.create_document(os.path.basename(markdown_file), markdown_file)
-    markdown_processor = MarkdownProcessor(db_manager)
+    # Process the markdown to update or store in the database
    markdown_processor.process_markdown(markdown_file, document_id)
+
+    # Calculate and store the hash
+    current_file_hash = compute_file_hash(markdown_file)
+    document_manager.update_document_hash(document_id, current_file_hash)
+
    print(f"Calendar for year {year} has been generated and stored in the database.")

 def main():
    """
    This script processes a markdown file, updates an SQLite database,
    and optionally selects a topic based on user input.
-
-    Initializes managers for database and markdown handling. Updates documents based on 
-    MD5 hash changes, and if a topic is provided, retrieves and writes its content 
-    to the markdown file.
-
-    Args:
-        -m, --markdown: Path to markdown file (default: 'calendar.md').
-        -d, --database: Path to SQLite database file (default: 'markdown.db').
-        topic_title: Optional topic for content selection (fuzzy matching enabled).
-        --bootstrap: If provided, generates markdown calendar for the current year and loads it to the database.
-        --ls: If provided, lists all available headings.
-        --html: If provided, will produce {filename}.html file along the markdown file.
    """
-    # Set up command-line argument parsing
    parser = argparse.ArgumentParser(description='Process markdown file and optionally select a topic.')
    parser.add_argument('-m', '--markdown', type=str, default='calendar.md', help='Input/output markdown file (default: calendar.md)')
    parser.add_argument('-d', '--database', type=str, default='markdown.db', help='SQLite database file (default: markdown.db)')
    parser.add_argument('topic_title', nargs='?', type=str, help='Topic title to select (fuzzy matching enabled)')
    parser.add_argument('--bootstrap', action='store_true', help='Generate markdown calendar for the current year and load it to the database.')
    parser.add_argument('--ls', action='store_true', help='List all available headings.')
-    parser.add_argument('--html', action='store_true', help='Generate an HTML version of the output')
+    parser.add_argument('--html', action='store_true', help='Generate an HTML version of the output.')
+    parser.add_argument('--uuid', type=str, help='Specify a UUID to retrieve content.')
    parser.add_argument('--debug', action='store_true', help='Enable debug printing')
    args = parser.parse_args()

-    # Set up logging
+    # Setup basic logging
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

-
-    # Use the provided or default file paths
-    markdown_file = args.markdown
-    database_file = args.database
-
-    # Initialize manager objects for database operations
-    db_manager = DatabaseManager(database_file)
-
-    if args.bootstrap:
-        bootstrap_calendar(datetime.now().year, db_manager, markdown_file)
+    # Check for markdown file presence
+    if not os.path.exists(args.markdown) and not args.bootstrap:
+        print(f"Error: Markdown file '{args.markdown}' not found. Use --bootstrap to create a new calendar.")
        db_manager.close()
        return
+    # Check for databse file presence
+    if not os.path.exists(args.database) and not args.bootstrap:
+        print(f"Error: Database file '{args.database}' not found. Use --bootstrap to create a new calendar.")
+        db_manager.close()
+        return
+
+    # Initialize manager objects
+    db_manager = DatabaseManager(args.database)
+    # Initialize the MarkdownProcessor with the db_manager
+    markdown_processor = MarkdownProcessor(db_manager)
+
+    if args.bootstrap:
+        print('## Running calendar bootstrap')
+        bootstrap_calendar(datetime.now().year, db_manager, args.markdown)
+        db_manager.close()
+        return
+        
+    document_manager = DocumentManager(db_manager)
+
+    # Get the base name of the markdown file (without path)
+    document_name = os.path.basename(args.markdown)
+    # Check if file with same name has been uploaded before
+    document_id = document_manager.document_exists(document_name)
+    # Compute the current MD5 hash of the markdown file
+    current_file_hash = compute_file_hash(args.markdown)
+
+    if document_id: 
+        # Check if current_file_hash is already in database
+        db_manager.cursor.execute("SELECT md5sum FROM documents WHERE id = ?", (document_id[0],))
+        result = db_manager.cursor.fetchone()
+        
+        if result is None or result[0] != current_file_hash:
+            print("File has changed or hash not found. Processing...")
+            markdown_processor.process_markdown(args.markdown, document_id[0])
+            document_manager.update_document_hash(document_id[0], current_file_hash)
+        else:
+            print("No changes detected in the file.")
+    else:
+        print("Document does not exist, creating new entry.")
+        new_document_id = document_manager.create_document(document_name, args.markdown)
+        markdown_processor.process_markdown(args.markdown, new_document_id)
+        document_manager.update_document_hash(new_document_id, current_file_hash)

    if args.ls:
        topic_reader = TopicReader(db_manager)
@ -533,90 +665,29 @@ def main():
        db_manager.close()
        return

-    # Check if the markdown file exists
-    if not os.path.exists(markdown_file):
-        print(f"Error: Markdown file '{markdown_file}' not found. Use --bootstrap to create a new calendar.")
-        db_manager.close()
-        return
-
-    document_manager = DocumentManager(db_manager)
-    markdown_processor = MarkdownProcessor(db_manager)
-
-    # Get the base name of the markdown file (without path)
-    document_name = os.path.basename(markdown_file)
-
-    # Check if the document already exists in the database
-    document = db_manager.cursor.execute('SELECT id, file_path, md5sum, updated_timestamp FROM documents WHERE name = ?', (document_name,)).fetchone()
-
-    # Compute the current MD5 hash of the markdown file
-    current_file_hash = compute_file_hash(markdown_file)
-
-    if document:
-        # If the document exists in the database
-        document_id, stored_file_path, stored_md5sum, last_updated = document
-
-        if stored_file_path != markdown_file:
-            print(f"Updating file path for '{document_name}' in the database...")
-            document_manager.update_document(document_id, file_path=markdown_file)
-
-        if stored_md5sum != current_file_hash:
-            # If the file has changed since last update
-            print(f"File '{document_name}' has changed. Updating the database...")
-            document_manager.update_document_hash(document_id, current_file_hash)
-            markdown_processor.process_markdown(markdown_file, document_id)
-        else:
-            # If the file hasn't changed
-            print(f"File '{document_name}' has not changed. Skipping update.")
-    else:
-        # If the document doesn't exist in the database
-        print(f"Document '{document_name}' not found in the database. Adding new entry...")
-        document_id = document_manager.create_document(document_name, markdown_file)
-        document_manager.update_document_hash(document_id, current_file_hash)
-        markdown_processor.process_markdown(markdown_file, document_id)
-
-    # Check if a topic title argument is provided
-    if args.topic_title:
-        # Initialize TopicReader
+    # Topic or UUID-based content retrieval
+    if args.topic_title or args.uuid:
        topic_reader = TopicReader(db_manager)

-        # Retrieve the content for the specified topic
-        result = topic_reader.get_topic_content(args.topic_title)
-
-        if result:
-            # If content is found, write it back to the original markdown file
-            with open(markdown_file, 'w', encoding='utf-8') as file:
-                file.write(result)
-                file.write('\n')
-            print(f"Selected topic and subtopics written to {markdown_file}")
-            
-            # Generate HTML if --html option is specified
-            if args.html:
-                html_file = os.path.splitext(markdown_file)[0] + '.html'
-                html_content = convert_to_html(result)
-                with open(html_file, 'w', encoding='utf-8') as file:
-                    file.write(html_content)
-                print(f"HTML version written to {html_file}")
-
-            # Update the document hash in the database
-            new_file_hash = compute_file_hash(markdown_file)
-            document_manager.update_document_hash(document_id, new_file_hash)
+        if args.uuid:
+            content = topic_reader.build_full_content(args.uuid)
        else:
-            # If no content is found
-            print("No result to write. The original file remains unchanged.")
-    else:
-        print("No topic title provided. The database has been updated/added without modifying the file.")
+            content = topic_reader.get_topic_content(args.topic_title)

-        # Generate HTML for the entire document if --html option is specified
+        if content:
+            # Write the selected content to the markdown file
+            with open(args.markdown, 'w', encoding='utf-8') as file:
+                file.write(content)
+                file.write('\n')
+            print(f"Selected content written to {args.markdown}")
+
+        # Optionally convert to HTML
        if args.html:
-            with open(markdown_file, 'r', encoding='utf-8') as file:
-                markdown_content = file.read()
-            html_file = os.path.splitext(markdown_file)[0] + '.html'
-            html_content = convert_to_html(markdown_content)
+            html_file = f"{args.markdown}.html"
            with open(html_file, 'w', encoding='utf-8') as file:
-                file.write(html_content)
-            print(f"HTML version of the entire document written to {html_file}")
+                file.write(convert_to_html(content))
+            print(f"HTML version written to {html_file}")
    
-    # Close the database connection
    db_manager.close()

 if __name__ == '__main__':