Still broken. Trying to make the update procedure to make sense.
This commit is contained in:
		@@ -13,7 +13,7 @@ import hashlib
 | 
				
			|||||||
import argparse
 | 
					import argparse
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
from typing import List, Tuple, Optional
 | 
					from typing import List, Tuple, Dict, Set, Optional
 | 
				
			||||||
from markdown_it import MarkdownIt
 | 
					from markdown_it import MarkdownIt
 | 
				
			||||||
from thefuzz import fuzz, process
 | 
					from thefuzz import fuzz, process
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -137,108 +137,188 @@ class DocumentManager:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class MarkdownProcessor:
 | 
					class MarkdownProcessor:
 | 
				
			||||||
    """Processes markdown files and stores content in the database."""
 | 
					    """Processes markdown files and stores content in the database."""
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self, db_manager: 'DatabaseManager') -> None:
 | 
					    def __init__(self, db_manager: 'DatabaseManager') -> None:
 | 
				
			||||||
        """Initialize the MarkdownProcessor."""
 | 
					 | 
				
			||||||
        self.db_manager = db_manager
 | 
					        self.db_manager = db_manager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def process_markdown(self, markdown_file: str, document_id: int) -> None:
 | 
					    def process_markdown(self, markdown_file: str, document_id: int) -> None:
 | 
				
			||||||
        """Process a markdown file and store its content in the database."""
 | 
					 | 
				
			||||||
        markdown_text = self.read_markdown_file(markdown_file)
 | 
					        markdown_text = self.read_markdown_file(markdown_file)
 | 
				
			||||||
        md = MarkdownIt()
 | 
					        md = MarkdownIt()
 | 
				
			||||||
        tokens = md.parse(markdown_text)
 | 
					        tokens = md.parse(markdown_text)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        self.clear_document_content(document_id)
 | 
					        self.update_document_content(tokens, document_id)
 | 
				
			||||||
        self.store_markdown_content(tokens, document_id)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read_markdown_file(self, file_path: str) -> str:
 | 
					    def read_markdown_file(self, file_path: str) -> str:
 | 
				
			||||||
        """Read content from a markdown file."""
 | 
					 | 
				
			||||||
        with open(file_path, 'r', encoding='utf-8') as file:
 | 
					        with open(file_path, 'r', encoding='utf-8') as file:
 | 
				
			||||||
            return file.read()
 | 
					            return file.read()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def clear_document_content(self, document_id: int) -> None:
 | 
					    def update_document_content(self, tokens: List, document_id: int) -> None:
 | 
				
			||||||
        """Clear existing content for a document in the database."""
 | 
					        existing_structure = self.get_existing_document_structure(document_id)
 | 
				
			||||||
        logging.debug(f"!! DELETING FROM DATABASE, document_id: {document_id}")
 | 
					        new_structure = self.parse_new_structure(tokens)
 | 
				
			||||||
        self.db_manager.cursor.execute('DELETE FROM headings WHERE document_id = ?', (document_id,))
 | 
					        
 | 
				
			||||||
        self.db_manager.cursor.execute('DELETE FROM body WHERE document_id = ?', (document_id,))
 | 
					        self.merge_structures(existing_structure, new_structure, document_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_existing_document_structure(self, document_id: int) -> Dict:
 | 
				
			||||||
 | 
					        structure = {}
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            SELECT h.id, h.level, h.title, h.parent_id, b.content
 | 
				
			||||||
 | 
					            FROM headings h
 | 
				
			||||||
 | 
					            LEFT JOIN body b ON h.id = b.heading_id
 | 
				
			||||||
 | 
					            WHERE h.document_id = ? AND h.isDeleted = 0
 | 
				
			||||||
 | 
					            ORDER BY h.level, h.id
 | 
				
			||||||
 | 
					        ''', (document_id,))
 | 
				
			||||||
 | 
					        for heading_id, level, title, parent_id, content in self.db_manager.cursor.fetchall():
 | 
				
			||||||
 | 
					            structure[heading_id] = {
 | 
				
			||||||
 | 
					                'level': level,
 | 
				
			||||||
 | 
					                'title': title,
 | 
				
			||||||
 | 
					                'parent_id': parent_id,
 | 
				
			||||||
 | 
					                'content': content,
 | 
				
			||||||
 | 
					                'children': []
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        # Build the tree structure
 | 
				
			||||||
 | 
					        root = {}
 | 
				
			||||||
 | 
					        for id, node in structure.items():
 | 
				
			||||||
 | 
					            if node['parent_id'] in structure:
 | 
				
			||||||
 | 
					                structure[node['parent_id']]['children'].append(id)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                root[id] = node
 | 
				
			||||||
 | 
					        return root
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_new_structure(self, tokens: List) -> Dict:
 | 
				
			||||||
 | 
					        structure = {}
 | 
				
			||||||
 | 
					        current_heading = None
 | 
				
			||||||
 | 
					        current_content = []
 | 
				
			||||||
 | 
					        parent_stack = [{"id": None, "level": 0}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def store_markdown_content(self, tokens: List, document_id: int) -> None:
 | 
					 | 
				
			||||||
        """Store parsed markdown content in the database."""
 | 
					 | 
				
			||||||
        parent_stack: List[Tuple[int, int]] = []  # (level, heading_id)
 | 
					 | 
				
			||||||
        current_heading_id = None
 | 
					 | 
				
			||||||
        for token in tokens:
 | 
					        for token in tokens:
 | 
				
			||||||
            content_preview = ' '.join(token.content.split()[:10]) + '...' \
 | 
					 | 
				
			||||||
                if len(token.content.split()) > 10 else token.content
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            #logging.debug(f"Processing token: {token.type}, content: {content_preview}")
 | 
					 | 
				
			||||||
            if token.type == 'heading_open':
 | 
					            if token.type == 'heading_open':
 | 
				
			||||||
 | 
					                if current_heading:
 | 
				
			||||||
 | 
					                    structure[current_heading]['content'] = ''.join(current_content).strip()
 | 
				
			||||||
                level = int(token.tag.strip('h'))
 | 
					                level = int(token.tag.strip('h'))
 | 
				
			||||||
                content_token = tokens[tokens.index(token) + 1]
 | 
					                while parent_stack[-1]['level'] >= level:
 | 
				
			||||||
                title = content_token.content
 | 
					 | 
				
			||||||
                
 | 
					 | 
				
			||||||
                # Find the appropriate parent
 | 
					 | 
				
			||||||
                while parent_stack and parent_stack[-1][0] >= level:
 | 
					 | 
				
			||||||
                    parent_stack.pop()
 | 
					                    parent_stack.pop()
 | 
				
			||||||
 | 
					                current_heading = str(uuid.uuid4())  # Generate a temporary ID
 | 
				
			||||||
 | 
					                structure[current_heading] = {
 | 
				
			||||||
 | 
					                    'level': level,
 | 
				
			||||||
 | 
					                    'title': '',
 | 
				
			||||||
 | 
					                    'parent_id': parent_stack[-1]['id'],
 | 
				
			||||||
 | 
					                    'content': '',
 | 
				
			||||||
 | 
					                    'children': []
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                parent_stack.append({"id": current_heading, "level": level})
 | 
				
			||||||
 | 
					                current_content = []
 | 
				
			||||||
 | 
					            elif token.type == 'heading_close':
 | 
				
			||||||
 | 
					                structure[current_heading]['content'] = ''.join(current_content).strip()
 | 
				
			||||||
 | 
					            elif token.type == 'inline' and current_heading:
 | 
				
			||||||
 | 
					                if structure[current_heading]['title'] == '':
 | 
				
			||||||
 | 
					                    structure[current_heading]['title'] = token.content
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    current_content.append(token.content)
 | 
				
			||||||
 | 
					            elif current_heading:
 | 
				
			||||||
 | 
					                current_content.append(token.content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                parent_id = parent_stack[-1][1] if parent_stack else None
 | 
					        if current_heading:
 | 
				
			||||||
                current_heading_id = self.insert_heading(level, title, parent_id, document_id)
 | 
					            structure[current_heading]['content'] = ''.join(current_content).strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                parent_stack.append((level, current_heading_id))
 | 
					        return structure
 | 
				
			||||||
            elif token.type == 'inline' and current_heading_id and token.content.strip():
 | 
					 | 
				
			||||||
                # Only insert non-empty content that's not part of a heading
 | 
					 | 
				
			||||||
                if tokens[tokens.index(token) - 1].type != 'heading_open':
 | 
					 | 
				
			||||||
                    self.insert_body(token.content, current_heading_id, document_id)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.db_manager.conn.commit()
 | 
					    def merge_structures(self, existing: Dict, new: Dict, document_id: int) -> None:
 | 
				
			||||||
 | 
					        def merge_recursive(existing_node, new_node, parent_id):
 | 
				
			||||||
 | 
					            if not existing_node:
 | 
				
			||||||
 | 
					                # This is a new node, insert it
 | 
				
			||||||
 | 
					                heading_id = self.insert_heading(new_node['level'], new_node['title'], parent_id, document_id)
 | 
				
			||||||
 | 
					                self.insert_body(new_node['content'], heading_id, document_id)
 | 
				
			||||||
 | 
					                for child in new_node['children']:
 | 
				
			||||||
 | 
					                    merge_recursive(None, new[child], heading_id)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                # Update existing node
 | 
				
			||||||
 | 
					                self.update_heading(existing_node['id'], new_node['title'], new_node['level'], parent_id)
 | 
				
			||||||
 | 
					                self.update_body(existing_node['id'], new_node['content'], document_id)
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                # Process children
 | 
				
			||||||
 | 
					                existing_children = {child['title']: child for child in existing_node['children']}
 | 
				
			||||||
 | 
					                new_children = {child['title']: child for child in new_node['children']}
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                for title, child in new_children.items():
 | 
				
			||||||
 | 
					                    if title in existing_children:
 | 
				
			||||||
 | 
					                        merge_recursive(existing_children[title], child, existing_node['id'])
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        merge_recursive(None, child, existing_node['id'])
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                for title, child in existing_children.items():
 | 
				
			||||||
 | 
					                    if title not in new_children:
 | 
				
			||||||
 | 
					                        self.soft_delete_heading(child['id'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for new_root in new.values():
 | 
				
			||||||
 | 
					            existing_root = next((node for node in existing.values() if node['title'] == new_root['title']), None)
 | 
				
			||||||
 | 
					            merge_recursive(existing_root, new_root, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
 | 
					    def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
 | 
				
			||||||
        """Insert a heading into the database."""
 | 
					 | 
				
			||||||
        logging.debug(f"Inserting title: {title} level: {level}")
 | 
					 | 
				
			||||||
        self.db_manager.cursor.execute('''
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
            INSERT INTO headings (level, title, parent_id, document_id)
 | 
					            INSERT INTO headings (level, title, parent_id, document_id)
 | 
				
			||||||
            VALUES (?, ?, ?, ?)
 | 
					            VALUES (?, ?, ?, ?)
 | 
				
			||||||
        ''', (level, title, parent_id, document_id))
 | 
					        ''', (level, title, parent_id, document_id))
 | 
				
			||||||
        return self.db_manager.cursor.lastrowid
 | 
					        return self.db_manager.cursor.lastrowid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def update_heading(self, heading_id: int, title: str, level: int, parent_id: Optional[int]) -> None:
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            UPDATE headings
 | 
				
			||||||
 | 
					            SET title = ?, level = ?, parent_id = ?, updated_timestamp = CURRENT_TIMESTAMP
 | 
				
			||||||
 | 
					            WHERE id = ?
 | 
				
			||||||
 | 
					        ''', (title, level, parent_id, heading_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def insert_body(self, content: str, heading_id: int, document_id: int) -> None:
 | 
					    def insert_body(self, content: str, heading_id: int, document_id: int) -> None:
 | 
				
			||||||
        """Insert body content into the database with checksumming."""
 | 
					 | 
				
			||||||
        md5sum = hashlib.md5(content.encode()).hexdigest()
 | 
					        md5sum = hashlib.md5(content.encode()).hexdigest()
 | 
				
			||||||
        self.db_manager.cursor.execute('''
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
            INSERT INTO body (content, heading_id, document_id, md5sum)
 | 
					            INSERT INTO body (content, heading_id, document_id, md5sum)
 | 
				
			||||||
            VALUES (?, ?, ?, ?)
 | 
					            VALUES (?, ?, ?, ?)
 | 
				
			||||||
        ''', (content, heading_id, document_id, md5sum))
 | 
					        ''', (content, heading_id, document_id, md5sum))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def update_body(self, heading_id: int, content: str, document_id: int) -> None:
 | 
				
			||||||
 | 
					        md5sum = hashlib.md5(content.encode()).hexdigest()
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            UPDATE body
 | 
				
			||||||
 | 
					            SET content = ?, md5sum = ?, updated_timestamp = CURRENT_TIMESTAMP
 | 
				
			||||||
 | 
					            WHERE heading_id = ? AND document_id = ?
 | 
				
			||||||
 | 
					        ''', (content, md5sum, heading_id, document_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def soft_delete_heading(self, heading_id: int) -> None:
 | 
				
			||||||
 | 
					        now = datetime.now().isoformat()
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            UPDATE headings
 | 
				
			||||||
 | 
					            SET isDeleted = 1, deleted_timestamp = ?
 | 
				
			||||||
 | 
					            WHERE id = ?
 | 
				
			||||||
 | 
					        ''', (now, heading_id))
 | 
				
			||||||
 | 
					        # Also soft delete associated body content
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            UPDATE body
 | 
				
			||||||
 | 
					            SET isDeleted = 1, deleted_timestamp = ?
 | 
				
			||||||
 | 
					            WHERE heading_id = ?
 | 
				
			||||||
 | 
					        ''', (now, heading_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TopicReader:
 | 
					class TopicReader:
 | 
				
			||||||
    """Reads and retrieves topics from the database."""
 | 
					    """Reads and retrieves topics from the database."""
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self, db_manager: 'DatabaseManager'):
 | 
					    def __init__(self, db_manager: 'DatabaseManager'):
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Initialize the TopicReader.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Args:
 | 
					 | 
				
			||||||
            db_manager (DatabaseManager): An instance of DatabaseManager.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        self.db_manager = db_manager
 | 
					        self.db_manager = db_manager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fetch_headings(self) -> List[Tuple[int, str, int]]:
 | 
					    def fetch_headings(self) -> List[Tuple[int, str, int, Optional[int]]]:
 | 
				
			||||||
        """
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
        Fetch all non-deleted headings from the database.
 | 
					            SELECT id, title, level, parent_id 
 | 
				
			||||||
        """
 | 
					            FROM headings 
 | 
				
			||||||
        self.db_manager.cursor.execute('SELECT id, title, level FROM headings WHERE isDeleted = 0 ORDER BY level, id')
 | 
					            WHERE isDeleted = 0 
 | 
				
			||||||
 | 
					            ORDER BY level, id
 | 
				
			||||||
 | 
					        ''')
 | 
				
			||||||
        return self.db_manager.cursor.fetchall()
 | 
					        return self.db_manager.cursor.fetchall()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fetch_topic_chain(self, heading_id: int) -> List[Tuple[int, str, int]]:
 | 
					    def fetch_topic_chain(self, heading_id: int) -> List[Tuple[int, str, int]]:
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Fetch the topic chain (hierarchy of parent topics) for a given heading.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            List[Tuple[int, str, int]]: List of (id, title, level) tuples representing the topic chain.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        chain = []
 | 
					        chain = []
 | 
				
			||||||
        current_id = heading_id
 | 
					        current_id = heading_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        while current_id is not None:
 | 
					        while current_id is not None:
 | 
				
			||||||
            self.db_manager.cursor.execute('SELECT id, title, level, parent_id FROM headings WHERE id = ?', (current_id,))
 | 
					            self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					                SELECT id, title, level, parent_id 
 | 
				
			||||||
 | 
					                FROM headings 
 | 
				
			||||||
 | 
					                WHERE id = ?
 | 
				
			||||||
 | 
					            ''', (current_id,))
 | 
				
			||||||
            result = self.db_manager.cursor.fetchone()
 | 
					            result = self.db_manager.cursor.fetchone()
 | 
				
			||||||
            if result:
 | 
					            if result:
 | 
				
			||||||
                chain.append((result[0], result[1], result[2]))
 | 
					                chain.append((result[0], result[1], result[2]))
 | 
				
			||||||
@@ -249,117 +329,72 @@ class TopicReader:
 | 
				
			|||||||
        return list(reversed(chain))
 | 
					        return list(reversed(chain))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def list_headings(self) -> str:
 | 
					    def list_headings(self) -> str:
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        List all available headings in a hierarchical structure.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            str: A formatted string containing all headings.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        headings = self.fetch_headings()
 | 
					        headings = self.fetch_headings()
 | 
				
			||||||
        result = "Available headings:\n"
 | 
					        result = "Available headings:\n"
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        for _, title, level in headings:
 | 
					        def build_tree(parent_id, level):
 | 
				
			||||||
            indent = "  " * (level - 1)
 | 
					            tree = ""
 | 
				
			||||||
            result += f"{indent}- {title}\n"
 | 
					            for id, title, hlevel, parent in headings:
 | 
				
			||||||
 | 
					                if parent == parent_id:
 | 
				
			||||||
 | 
					                    indent = "  " * (hlevel - 1)
 | 
				
			||||||
 | 
					                    tree += f"{indent}- {title}\n"
 | 
				
			||||||
 | 
					                    tree += build_tree(id, hlevel + 1)
 | 
				
			||||||
 | 
					            return tree
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					        result += build_tree(None, 1)
 | 
				
			||||||
        return result.strip()
 | 
					        return result.strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fetch_body_and_subtopics(self, heading_id: int, include_subtopics: bool = True, level_offset: int = 0) -> str:
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Fetch body content and subtopics for a given heading with improved Markdown formatting.
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        Args:
 | 
					 | 
				
			||||||
            heading_id (int): ID of the heading to fetch.
 | 
					 | 
				
			||||||
            include_subtopics (bool): Whether to include subtopics in the result.
 | 
					 | 
				
			||||||
            level_offset (int): Offset to adjust heading levels for proper nesting.
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            str: Formatted string containing the heading content and subtopics.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        # Fetch the current heading and body content
 | 
					 | 
				
			||||||
        self.db_manager.cursor.execute('SELECT level, title FROM headings WHERE id = ?', (heading_id,))
 | 
					 | 
				
			||||||
        level, title = self.db_manager.cursor.fetchone()
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        # Adjust the level based on the offset
 | 
					 | 
				
			||||||
        adjusted_level = max(1, level - level_offset)
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        # Fetch the content for this heading
 | 
					 | 
				
			||||||
        self.db_manager.cursor.execute('SELECT content FROM body WHERE heading_id = ?', (heading_id,))
 | 
					 | 
				
			||||||
        rows = self.db_manager.cursor.fetchall()
 | 
					 | 
				
			||||||
        body_content = '\n'.join([row[0] for row in rows])
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        # Construct the result with proper spacing
 | 
					 | 
				
			||||||
        result = f"\n{'#' * adjusted_level} {title}\n\n"
 | 
					 | 
				
			||||||
        if body_content.strip():
 | 
					 | 
				
			||||||
            result += f"{body_content.strip()}\n\n"
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        if include_subtopics:
 | 
					 | 
				
			||||||
            # Fetch all subtopics that are children of the current heading
 | 
					 | 
				
			||||||
            subtopics = self._fetch_subtopics(heading_id, adjusted_level)
 | 
					 | 
				
			||||||
            for subtopic_id, _, _ in subtopics:
 | 
					 | 
				
			||||||
                # Recursively fetch subtopic content
 | 
					 | 
				
			||||||
                subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True, level_offset=level_offset)
 | 
					 | 
				
			||||||
                result += subtopic_content
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        return result.strip() + "\n"  # Ensure there's a newline at the end of each section
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_topic_content(self, input_title: str) -> Optional[str]:
 | 
					    def get_topic_content(self, input_title: str) -> Optional[str]:
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Get the content of a topic based on the input title, including its topic chain and subtopics.
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            str or None: Formatted string containing the topic chain, content, and subtopics, or None if not found.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        heading_id = self.find_closest_heading(input_title)
 | 
					        heading_id = self.find_closest_heading(input_title)
 | 
				
			||||||
        if heading_id:
 | 
					        if heading_id:
 | 
				
			||||||
            topic_chain = self.fetch_topic_chain(heading_id)
 | 
					            topic_chain = self.fetch_topic_chain(heading_id)
 | 
				
			||||||
            result = ""
 | 
					            result = self.build_full_content(topic_chain[-1][0])
 | 
				
			||||||
            for i, (id, title, level) in enumerate(topic_chain):
 | 
					            return result
 | 
				
			||||||
                if id == heading_id:
 | 
					 | 
				
			||||||
                    # Fetch the full content for the selected topic and its subtopics
 | 
					 | 
				
			||||||
                    result += self.fetch_body_and_subtopics(id, include_subtopics=True, level_offset=i)
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    # Include only the heading chain without duplicating content
 | 
					 | 
				
			||||||
                    result += f"\n{'#' * (level - i)} {title}\n\n"
 | 
					 | 
				
			||||||
            return result.strip() + "\n"  # Ensure there's a final newline
 | 
					 | 
				
			||||||
        print(f"No topic found matching '{input_title}'.")
 | 
					 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]:
 | 
					    def build_full_content(self, heading_id: int, level_offset: int = 0) -> str:
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Fetch all subtopics that are children of the given heading.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            List of tuples containing the subtopic's ID, level, and title.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        self.db_manager.cursor.execute('''
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
            SELECT id, level, title 
 | 
					            SELECT h.level, h.title, b.content
 | 
				
			||||||
            FROM headings 
 | 
					            FROM headings h
 | 
				
			||||||
 | 
					            LEFT JOIN body b ON h.id = b.heading_id
 | 
				
			||||||
 | 
					            WHERE h.id = ? AND h.isDeleted = 0
 | 
				
			||||||
 | 
					        ''', (heading_id,))
 | 
				
			||||||
 | 
					        heading = self.db_manager.cursor.fetchone()
 | 
				
			||||||
 | 
					        if not heading:
 | 
				
			||||||
 | 
					            return ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        level, title, content = heading
 | 
				
			||||||
 | 
					        adjusted_level = max(1, level - level_offset)
 | 
				
			||||||
 | 
					        result = f"{'#' * adjusted_level} {title}\n\n"
 | 
				
			||||||
 | 
					        if content:
 | 
				
			||||||
 | 
					            result += f"{content.strip()}\n\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Fetch and process all child headings
 | 
				
			||||||
 | 
					        self.db_manager.cursor.execute('''
 | 
				
			||||||
 | 
					            SELECT id FROM headings
 | 
				
			||||||
            WHERE parent_id = ? AND isDeleted = 0
 | 
					            WHERE parent_id = ? AND isDeleted = 0
 | 
				
			||||||
            ORDER BY level, id
 | 
					            ORDER BY level, id
 | 
				
			||||||
        ''', (heading_id,))
 | 
					        ''', (heading_id,))
 | 
				
			||||||
        return self.db_manager.cursor.fetchall()
 | 
					        children = self.db_manager.cursor.fetchall()
 | 
				
			||||||
 | 
					        for child in children:
 | 
				
			||||||
 | 
					            result += self.build_full_content(child[0], level_offset)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def find_closest_heading(self, input_title: str) -> Optional[int]:
 | 
					    def find_closest_heading(self, input_title: str) -> Optional[int]:
 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Find the closest matching heading to the input title using fuzzy matching.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Returns:
 | 
					 | 
				
			||||||
            int or None: ID of the closest matching heading, or None if no match found.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        headings = self.fetch_headings()
 | 
					        headings = self.fetch_headings()
 | 
				
			||||||
        if not headings:
 | 
					        if not headings:
 | 
				
			||||||
            print("No topics found in the database.")
 | 
					            print("No topics found in the database.")
 | 
				
			||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        heading_titles = [title for _, title, _ in headings]
 | 
					        heading_titles = [title for _, title, _, _ in headings]
 | 
				
			||||||
        closest_match, confidence = process.extractOne(input_title, heading_titles, scorer=fuzz.token_sort_ratio)
 | 
					        closest_match, confidence = process.extractOne(input_title, heading_titles, scorer=fuzz.token_sort_ratio)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if confidence < 50:
 | 
					        if confidence < 50:
 | 
				
			||||||
            print(f"No close matches found for '{input_title}' (Confidence: {confidence})")
 | 
					            print(f"No close matches found for '{input_title}' (Confidence: {confidence})")
 | 
				
			||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for heading_id, title, level in headings:
 | 
					        for heading_id, title, _, _ in headings:
 | 
				
			||||||
            if title == closest_match:
 | 
					            if title == closest_match:
 | 
				
			||||||
                return heading_id
 | 
					                return heading_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user