From cf1674735d73699213a6e2088dfb49d284f74ce1 Mon Sep 17 00:00:00 2001 From: kalzu rekku Date: Fri, 4 Oct 2024 09:58:34 +0300 Subject: [PATCH] Small side quest. Added possibility to get the output also in html. --- markdown_sqlite.py | 129 +++++++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 39 deletions(-) diff --git a/markdown_sqlite.py b/markdown_sqlite.py index 836a172..0329b77 100644 --- a/markdown_sqlite.py +++ b/markdown_sqlite.py @@ -254,13 +254,14 @@ class TopicReader: return result.strip() - def fetch_body_and_subtopics(self, heading_id: int, include_subtopics: bool = True) -> str: + def fetch_body_and_subtopics(self, heading_id: int, include_subtopics: bool = True, level_offset: int = 0) -> str: """ - Fetch body content and subtopics for a given heading. + Fetch body content and subtopics for a given heading with improved Markdown formatting. Args: heading_id (int): ID of the heading to fetch. include_subtopics (bool): Whether to include subtopics in the result. + level_offset (int): Offset to adjust heading levels for proper nesting. Returns: str: Formatted string containing the heading content and subtopics. @@ -268,27 +269,52 @@ class TopicReader: # Fetch the current heading and body content self.db_manager.cursor.execute('SELECT level, title FROM headings WHERE id = ?', (heading_id,)) level, title = self.db_manager.cursor.fetchone() + + # Adjust the level based on the offset + adjusted_level = max(1, level - level_offset) - # Fetch the content for this heading (the days in the calendar) + # Fetch the content for this heading self.db_manager.cursor.execute('SELECT content FROM body WHERE heading_id = ?', (heading_id,)) rows = self.db_manager.cursor.fetchall() body_content = '\n'.join([row[0] for row in rows]) - # Write the heading once and then its body content - #result = f"\n{'#' * level} {title}\n{body_content.strip()}\n" - result = f"{'#' * level} {title}\n\n{body_content.strip()}\n" + # Construct the result with proper spacing + result = f"\n{'#' * adjusted_level} {title}\n\n" + if body_content.strip(): + result += f"{body_content.strip()}\n\n" if include_subtopics: - # Fetch all subtopics (e.g., days) that are children of the current heading - subtopics = self._fetch_subtopics(heading_id, level) - for subtopic_id, subtopic_level, subtopic_title in subtopics: + # Fetch all subtopics that are children of the current heading + subtopics = self._fetch_subtopics(heading_id, adjusted_level) + for subtopic_id, _, _ in subtopics: # Recursively fetch subtopic content - subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True) + subtopic_content = self.fetch_body_and_subtopics(subtopic_id, include_subtopics=True, level_offset=level_offset) result += subtopic_content - #return result.strip() # Strip extra newlines - return result + return result.strip() + "\n" # Ensure there's a newline at the end of each section + + def get_topic_content(self, input_title: str) -> Optional[str]: + """ + Get the content of a topic based on the input title, including its topic chain and subtopics. + Returns: + str or None: Formatted string containing the topic chain, content, and subtopics, or None if not found. + """ + heading_id = self.find_closest_heading(input_title) + if heading_id: + topic_chain = self.fetch_topic_chain(heading_id) + result = "" + for i, (id, title, level) in enumerate(topic_chain): + if id == heading_id: + # Fetch the full content for the selected topic and its subtopics + result += self.fetch_body_and_subtopics(id, include_subtopics=True, level_offset=i) + else: + # Include only the heading chain without duplicating content + result += f"\n{'#' * (level - i)} {title}\n\n" + return result.strip() + "\n" # Ensure there's a final newline + print(f"No topic found matching '{input_title}'.") + return None + def _fetch_subtopics(self, heading_id: int, parent_level: int) -> List[Tuple[int, int, str]]: """ Fetch all subtopics that are children of the given heading. @@ -307,30 +333,6 @@ class TopicReader: ORDER BY level, id ''', (heading_id,)) return self.db_manager.cursor.fetchall() - - - def get_topic_content(self, input_title: str) -> Optional[str]: - """ - Get the content of a topic based on the input title, including its topic chain and subtopics. - - Returns: - str or None: Formatted string containing the topic chain, content, and subtopics, or None if not found. - """ - heading_id = self.find_closest_heading(input_title) - if heading_id: - topic_chain = self.fetch_topic_chain(heading_id) - result = "" - for id, title, level in topic_chain: - if id == heading_id: - # Fetch the full content for the selected topic and its subtopics - result += self.fetch_body_and_subtopics(id, include_subtopics=True) - else: - # Include only the heading chain without duplicating content - result += f"\n{'#' * level} {title}\n\n" - return result.strip() # Ensure there are no trailing newlines - print(f"No topic found matching '{input_title}'.") - return None - def find_closest_heading(self, input_title: str) -> Optional[int]: """ @@ -357,8 +359,6 @@ class TopicReader: return None - - def compute_file_hash(file_path: str) -> str: """ Compute the MD5 hash of a file. @@ -369,7 +369,6 @@ def compute_file_hash(file_path: str) -> str: hash_md5.update(chunk) return hash_md5.hexdigest() - def generate_calendar(year: int) -> str: """Generate a markdown calendar for the specified year.""" calendar_markdown = f"# {year}\n\n" @@ -396,6 +395,38 @@ def generate_calendar(year: int) -> str: calendar_markdown += "\n" # Add a newline after each month return calendar_markdown + +def convert_to_html(markdown_content: str) -> str: + """ + Convert Markdown content to HTML. + """ + md = MarkdownIt() + html_content = md.render(markdown_content) + + # Wrap the content in a basic HTML structure + html_document = f""" + + + + + + Calendar + + + + {html_content} + + + """ + + return html_document + def bootstrap_calendar(year: int, db_manager: DatabaseManager, markdown_file: str): """Generate and store a full year's markdown calendar in the database.""" @@ -435,6 +466,7 @@ def main(): parser.add_argument('topic_title', nargs='?', type=str, help='Topic title to select (fuzzy matching enabled)') parser.add_argument('--bootstrap', action='store_true', help='Generate markdown calendar for the current year and load it to the database.') parser.add_argument('--ls', action='store_true', help='List all available headings.') + parser.add_argument('--html', action='store_true', help='Generate an HTML version of the output') args = parser.parse_args() # Use the provided or default file paths @@ -508,7 +540,16 @@ def main(): # If content is found, write it back to the original markdown file with open(markdown_file, 'w', encoding='utf-8') as file: file.write(result) + file.write('\n') print(f"Selected topic and subtopics written to {markdown_file}") + + # Generate HTML if --html option is specified + if args.html: + html_file = os.path.splitext(markdown_file)[0] + '.html' + html_content = convert_to_html(result) + with open(html_file, 'w', encoding='utf-8') as file: + file.write(html_content) + print(f"HTML version written to {html_file}") # Update the document hash in the database new_file_hash = compute_file_hash(markdown_file) @@ -519,6 +560,16 @@ def main(): else: print("No topic title provided. The database has been updated/added without modifying the file.") + # Generate HTML for the entire document if --html option is specified + if args.html: + with open(markdown_file, 'r', encoding='utf-8') as file: + markdown_content = file.read() + html_file = os.path.splitext(markdown_file)[0] + '.html' + html_content = convert_to_html(markdown_content) + with open(html_file, 'w', encoding='utf-8') as file: + file.write(html_content) + print(f"HTML version of the entire document written to {html_file}") + # Close the database connection db_manager.close()