1 回答

TA貢獻1813條經驗 獲得超2個贊
import glob
import os
class Collector:
def __init__(self, filename, start_marker, stop_marker):
self.toggle_collect = False
self.f = open(filename, encoding='utf-8', errors='ignore')
self.m1s = ['|1234|', 'other_word1', 'other_word2']
self.m2 = 'abcd|'
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.f.close()
def __iter__(self):
return self
def __next__(self):
while True:
r = next(self.f)
for m1 in self.m1s:
if self.m2 in r: # found the stop-collecting marker
break
if m1 in r: # found the start-collecting marker
return r # .rstrip() # provide row
src_path = "e:/teste/Filtrados/"
dst_path = "e:/teste/FiltradosFinal/"
filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))
for x in filelist:
print(f"Processing file {x}")
with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \
Collector(os.path.join(src_path, x), '1234', 'abcd') as c:
for r in c:
f.write(r)
read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")
with open("e:/teste/teste.txt", "wb") as outfile:
for f in read_files:
print("Combinando arquivos")
with open(f, "rb") as infile:
我沒有測試代碼,因為您沒有提供任何方便的輸入或所需的輸出,但這似乎可以滿足您的要求。
我建議使用有意義的名稱而不是m1and m2,這樣您就可以在代碼變大時調試自己的代碼(更不用說讓其他人閱讀它了)。
添加回答
舉報