Fixing the heading lineage problem. Trying to figure out the double headings on output...
This commit is contained in:
parent
5b975c5304
commit
22ff45530d
@ -159,28 +159,27 @@ class MarkdownProcessor:
|
|||||||
|
|
||||||
def store_markdown_content(self, tokens: List, document_id: int) -> None:
|
def store_markdown_content(self, tokens: List, document_id: int) -> None:
|
||||||
"""Store parsed markdown content in the database."""
|
"""Store parsed markdown content in the database."""
|
||||||
parent_stack: List[Optional[int]] = []
|
parent_stack: List[Tuple[int, int]] = [] # (level, heading_id)
|
||||||
|
current_heading_id = None
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if token.type == 'heading_open':
|
if token.type == 'heading_open':
|
||||||
level = int(token.tag.strip('h'))
|
level = int(token.tag.strip('h'))
|
||||||
content_token = tokens[tokens.index(token) + 1]
|
content_token = tokens[tokens.index(token) + 1]
|
||||||
title = content_token.content
|
title = content_token.content
|
||||||
|
|
||||||
parent_id = parent_stack[-1] if parent_stack else None
|
# Find the appropriate parent
|
||||||
|
while parent_stack and parent_stack[-1][0] >= level:
|
||||||
heading_id = self.insert_heading(level, title, parent_id, document_id)
|
parent_stack.pop()
|
||||||
|
|
||||||
if not parent_stack or level > len(parent_stack):
|
parent_id = parent_stack[-1][1] if parent_stack else None
|
||||||
parent_stack.append(heading_id)
|
current_heading_id = self.insert_heading(level, title, parent_id, document_id)
|
||||||
else:
|
|
||||||
while parent_stack and level <= len(parent_stack):
|
parent_stack.append((level, current_heading_id))
|
||||||
parent_stack.pop()
|
elif token.type == 'inline' and current_heading_id and token.content.strip():
|
||||||
parent_stack.append(heading_id)
|
# Only insert non-empty content that's not part of a heading
|
||||||
|
if tokens[tokens.index(token) - 1].type != 'heading_open':
|
||||||
elif token.type == 'inline' and parent_stack:
|
self.insert_body(token.content, current_heading_id, document_id)
|
||||||
self.insert_body(token.content, parent_stack[-1], document_id)
|
|
||||||
|
|
||||||
self.db_manager.conn.commit()
|
self.db_manager.conn.commit()
|
||||||
|
|
||||||
def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
|
def insert_heading(self, level: int, title: str, parent_id: Optional[int], document_id: int) -> int:
|
||||||
@ -276,7 +275,8 @@ class TopicReader:
|
|||||||
body_content = '\n'.join([row[0] for row in rows])
|
body_content = '\n'.join([row[0] for row in rows])
|
||||||
|
|
||||||
# Write the heading once and then its body content
|
# Write the heading once and then its body content
|
||||||
result = f"{'#' * level} {title}\n{body_content.strip()}\n"
|
#result = f"\n{'#' * level} {title}\n{body_content.strip()}\n"
|
||||||
|
result = f"{'#' * level} {title}\n\n{body_content.strip()}\n"
|
||||||
|
|
||||||
if include_subtopics:
|
if include_subtopics:
|
||||||
# Fetch all subtopics (e.g., days) that are children of the current heading
|
# Fetch all subtopics (e.g., days) that are children of the current heading
|
||||||
@ -286,7 +286,8 @@ class TopicReader:
|
|||||||
subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True)
|
subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True)
|
||||||
result += subtopic_content
|
result += subtopic_content
|
||||||
|
|
||||||
return result.strip() # Strip extra newlines
|
#return result.strip() # Strip extra newlines
|
||||||
|
return result
|
||||||
|
|
||||||
def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]:
|
def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]:
|
||||||
"""
|
"""
|
||||||
@ -325,7 +326,7 @@ class TopicReader:
|
|||||||
result += self.fetch_body_and_subtopics(id, include_subtopics=True)
|
result += self.fetch_body_and_subtopics(id, include_subtopics=True)
|
||||||
else:
|
else:
|
||||||
# Include only the heading chain without duplicating content
|
# Include only the heading chain without duplicating content
|
||||||
result += f"{'#' * level} {title}\n\n"
|
result += f"\n{'#' * level} {title}\n\n"
|
||||||
return result.strip() # Ensure there are no trailing newlines
|
return result.strip() # Ensure there are no trailing newlines
|
||||||
print(f"No topic found matching '{input_title}'.")
|
print(f"No topic found matching '{input_title}'.")
|
||||||
return None
|
return None
|
||||||
|
Loading…
Reference in New Issue
Block a user