mpaste/gen_test_upload.py

48 lines
1.5 KiB
Python

import re
import os
from random import randrange
def new_upload(output_file):
"""
Extracts 1 to 3 sentences from input_file and writes them to output_file.
"""
num_sentences = randrange(1, 4)
input_file = '../HP1.txt'
index_file = '../read_index.txt'
def read_sentences(file_path, start_index, num_sentences):
sentences = []
with open(file_path, 'r') as file:
text = file.read()
sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
return sentences[start_index:start_index + num_sentences], start_index + num_sentences
def write_sentences(sentences, output_path):
with open(output_path, 'w') as file:
for sentence in sentences:
file.write(sentence + ' ')
def read_index(index_file):
if not os.path.exists(index_file):
return 0
with open(index_file, 'r') as file:
return int(file.read().strip())
def write_index(index_file, index):
with open(index_file, 'w') as file:
file.write(str(index))
# Read the current index
start_index = read_index(index_file)
# Read 1 to 3 sentences from the input file
sentences, new_index = read_sentences(input_file, start_index, num_sentences)
# Write the sentences to the output file
write_sentences(sentences, output_file)
# Write the new index back to the index file
write_index(index_file, new_index)
print(f"Successfully wrote {len(sentences)} sentence(s) to {output_file}.")