diff --git a/markdown_sqlite.py b/markdown_sqlite.py index 57f2c8f..836a172 100644 --- a/markdown_sqlite.py +++ b/markdown_sqlite.py @@ -159,28 +159,27 @@ class MarkdownProcessor: def store_markdown_content(self, tokens: List, document_id: int) -> None: """Store parsed markdown content in the database.""" - parent_stack: List[Optional[int]] = [] - + parent_stack: List[Tuple[int, int]] = [] # (level, heading_id) + current_heading_id = None for token in tokens: if token.type == 'heading_open': level = int(token.tag.strip('h')) content_token = tokens[tokens.index(token) + 1] title = content_token.content - - parent_id = parent_stack[-1] if parent_stack else None - - heading_id = self.insert_heading(level, title, parent_id, document_id) - - if not parent_stack or level > len(parent_stack): - parent_stack.append(heading_id) - else: - while parent_stack and level <= len(parent_stack): - parent_stack.pop() - parent_stack.append(heading_id) - - elif token.type == 'inline' and parent_stack: - self.insert_body(token.content, parent_stack[-1], document_id) - + + # Find the appropriate parent + while parent_stack and parent_stack[-1][0] >= level: + parent_stack.pop() + + parent_id = parent_stack[-1][1] if parent_stack else None + current_heading_id = self.insert_heading(level, title, parent_id, document_id) + + parent_stack.append((level, current_heading_id)) + elif token.type == 'inline' and current_heading_id and token.content.strip(): + # Only insert non-empty content that's not part of a heading + if tokens[tokens.index(token) - 1].type != 'heading_open': + self.insert_body(token.content, current_heading_id, document_id) + self.db_manager.conn.commit() def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int: @@ -276,7 +275,8 @@ class TopicReader: body_content = '\n'.join([row[0] for row in rows]) # Write the heading once and then its body content - result = f"{'#' * level} {title}\n{body_content.strip()}\n" + #result = f"\n{'#' * level} {title}\n{body_content.strip()}\n" + result = f"{'#' * level} {title}\n\n{body_content.strip()}\n" if include_subtopics: # Fetch all subtopics (e.g., days) that are children of the current heading @@ -286,7 +286,8 @@ class TopicReader: subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True) result += subtopic_content - return result.strip() # Strip extra newlines + #return result.strip() # Strip extra newlines + return result def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]: """ @@ -325,7 +326,7 @@ class TopicReader: result += self.fetch_body_and_subtopics(id, include_subtopics=True) else: # Include only the heading chain without duplicating content - result += f"{'#' * level} {title}\n\n" + result += f"\n{'#' * level} {title}\n\n" return result.strip() # Ensure there are no trailing newlines print(f"No topic found matching '{input_title}'.") return None