Fixing the heading lineage problem. Trying to figure out the double headings on output...
This commit is contained in:
		@@ -159,27 +159,26 @@ class MarkdownProcessor:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def store_markdown_content(self, tokens: List, document_id: int) -> None:
 | 
					    def store_markdown_content(self, tokens: List, document_id: int) -> None:
 | 
				
			||||||
        """Store parsed markdown content in the database."""
 | 
					        """Store parsed markdown content in the database."""
 | 
				
			||||||
        parent_stack: List[Optional[int]] = []
 | 
					        parent_stack: List[Tuple[int, int]] = []  # (level, heading_id)
 | 
				
			||||||
 | 
					        current_heading_id = None
 | 
				
			||||||
        for token in tokens:
 | 
					        for token in tokens:
 | 
				
			||||||
            if token.type == 'heading_open':
 | 
					            if token.type == 'heading_open':
 | 
				
			||||||
                level = int(token.tag.strip('h'))
 | 
					                level = int(token.tag.strip('h'))
 | 
				
			||||||
                content_token = tokens[tokens.index(token) + 1]
 | 
					                content_token = tokens[tokens.index(token) + 1]
 | 
				
			||||||
                title = content_token.content
 | 
					                title = content_token.content
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                parent_id = parent_stack[-1] if parent_stack else None
 | 
					                # Find the appropriate parent
 | 
				
			||||||
 | 
					                while parent_stack and parent_stack[-1][0] >= level:
 | 
				
			||||||
 | 
					                    parent_stack.pop()
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                heading_id = self.insert_heading(level, title, parent_id, document_id)
 | 
					                parent_id = parent_stack[-1][1] if parent_stack else None
 | 
				
			||||||
 | 
					                current_heading_id = self.insert_heading(level, title, parent_id, document_id)
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                if not parent_stack or level > len(parent_stack):
 | 
					                parent_stack.append((level, current_heading_id))
 | 
				
			||||||
                    parent_stack.append(heading_id)
 | 
					            elif token.type == 'inline' and current_heading_id and token.content.strip():
 | 
				
			||||||
                else:
 | 
					                # Only insert non-empty content that's not part of a heading
 | 
				
			||||||
                    while parent_stack and level <= len(parent_stack):
 | 
					                if tokens[tokens.index(token) - 1].type != 'heading_open':
 | 
				
			||||||
                        parent_stack.pop()
 | 
					                    self.insert_body(token.content, current_heading_id, document_id)
 | 
				
			||||||
                    parent_stack.append(heading_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            elif token.type == 'inline' and parent_stack:
 | 
					 | 
				
			||||||
                self.insert_body(token.content, parent_stack[-1], document_id)
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        self.db_manager.conn.commit()
 | 
					        self.db_manager.conn.commit()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -276,7 +275,8 @@ class TopicReader:
 | 
				
			|||||||
        body_content = '\n'.join([row[0] for row in rows])
 | 
					        body_content = '\n'.join([row[0] for row in rows])
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
        # Write the heading once and then its body content
 | 
					        # Write the heading once and then its body content
 | 
				
			||||||
        result = f"{'#' * level} {title}\n{body_content.strip()}\n"
 | 
					        #result = f"\n{'#' * level} {title}\n{body_content.strip()}\n"
 | 
				
			||||||
 | 
					        result = f"{'#' * level} {title}\n\n{body_content.strip()}\n"
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
        if include_subtopics:
 | 
					        if include_subtopics:
 | 
				
			||||||
            # Fetch all subtopics (e.g., days) that are children of the current heading
 | 
					            # Fetch all subtopics (e.g., days) that are children of the current heading
 | 
				
			||||||
@@ -286,7 +286,8 @@ class TopicReader:
 | 
				
			|||||||
                subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True)
 | 
					                subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True)
 | 
				
			||||||
                result += subtopic_content
 | 
					                result += subtopic_content
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
        return result.strip()  # Strip extra newlines
 | 
					        #return result.strip()  # Strip extra newlines
 | 
				
			||||||
 | 
					        return result
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]:
 | 
					    def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
@@ -325,7 +326,7 @@ class TopicReader:
 | 
				
			|||||||
                    result += self.fetch_body_and_subtopics(id, include_subtopics=True)
 | 
					                    result += self.fetch_body_and_subtopics(id, include_subtopics=True)
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    # Include only the heading chain without duplicating content
 | 
					                    # Include only the heading chain without duplicating content
 | 
				
			||||||
                    result += f"{'#' * level} {title}\n\n"
 | 
					                    result += f"\n{'#' * level} {title}\n\n"
 | 
				
			||||||
            return result.strip()  # Ensure there are no trailing newlines
 | 
					            return result.strip()  # Ensure there are no trailing newlines
 | 
				
			||||||
        print(f"No topic found matching '{input_title}'.")
 | 
					        print(f"No topic found matching '{input_title}'.")
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user