2022数据安全决赛数据分析脚本(实现了部分功能)

import re

f = open("数据文件.txt")
# pep = open("常用姓氏.txt")

linenum = -1
lineold = ""
addressold = ""
addlist = []
address = ""
for line in f.readlines():
    addlist = re.findall(r'.{2}省.{1,30}房| \
    .{2}省.{1,30}号|.{2}省.{1,30}院|.{2}省.{1,30}校| \
    .{2}省.{1,30}村|.{2}省.{1,30}区|.{2}省.{1,20}县|.{2}省.{1,10}市| \
    .{2}省', lineold[:-1] + line)
    address = "".join(addlist)
    lineold = line
    linenum = linenum + 1
    temp = address
    if len(address) > 0 and linenum > 0:
        if address != addressold:
            if address.count("。") or address.count("("):
                address = "".join(re.findall(r'(.*房|.*号|.*院|.*校|.*村|.*区|.*县|.*市|.*省).*。|.*(', address))
            if address == "":
                continue
            print(linenum, "Address", address)
    addressold = temp

  // 实现的功能:可以匹配文本中的地址,并且可以将两行合二为一(就是说一个地址上半部分在第n行,下半部分在第n+1行,这个程序可以完全匹配这个地址,并返回的行数为第n行