import re f = open("数据文件.txt") # pep = open("常用姓氏.txt") linenum = -1 lineold = "" addressold = "" addlist = [] address = "" for line in f.readlines(): addlist = re.findall(r'.{2}省.{1,30}房| \ .{2}省.{1,30}号|.{2}省.{1,30}院|.{2}省.{1,30}校| \ .{2}省.{1,30}村|.{2}省.{1,30}区|.{2}省.{1,20}县|.{2}省.{1,10}市| \ .{2}省', lineold[:-1] + line) address = "".join(addlist) lineold = line linenum = linenum + 1 temp = address if len(address) > 0 and linenum > 0: if address != addressold: if address.count("。") or address.count("("): address = "".join(re.findall(r'(.*房|.*号|.*院|.*校|.*村|.*区|.*县|.*市|.*省).*。|.*(', address)) if address == "": continue print(linenum, "Address", address) addressold = temp // 实现的功能:可以匹配文本中的地址,并且可以将两行合二为一(就是说一个地址上半部分在第n行,下半部分在第n+1行,这个程序可以完全匹配这个地址,并返回的行数为第n行