数据挖掘之Apriori算法详解和Python实现代码分享(2)
def find_item_name(self):
"根据第一行抽取item_name"
with open(self.filename, 'r') as F:
for index,line in enumerate(F.readlines()):
if index == 0:
self.item_name = self.deal_line(line)
break
def sut(self, location):
"""
输入[[1,2,3],[2,3,4],[1,3,5]...]
输出每个位置集的support [123,435,234...]
"""
with open(self.filename, 'r') as F:
support = [0] * len(location)
for index,line in enumerate(F.readlines()):
if index == 0: continue
# 提取每信息
item_line = self.deal_line(line)
for index_num,i in enumerate(location):
flag = 0
for j in i:
if item_line[j] != 'T':
flag = 1
break
if not flag:
support[index_num] += 1
self.line_num = index # 一共多少行,出去第一行的item_name
return support
def select(self, c):
"返回位置"
stack = []
for i in self.location:
for j in self.num:
if j in i:
if len(i) == c:
stack.append(i)
else:
stack.append([j] + i)
# 多重列表去重
import itertools
s = sorted([sorted(i) for i in stack])
location = list(s for s,_ in itertools.groupby(s))
return location
def del_location(self, support, location):
"清除不满足条件的候选集"
# 小于最小支持度的剔除
for index,i in enumerate(support):
if i < self.line_num * self.min_support / 100:
support[index] = 0
# apriori第二条规则,剔除
for index,j in enumerate(location):
sub_location = [j[:index_loc] + j[index_loc+1:]for index_loc in range(len(j))]
flag = 0
for k in sub_location:
if k not in self.location:
flag = 1
break
if flag:
support[index] = 0
# 删除没用的位置
location = [i for i,j in zip(location,support) if j != 0]
support = [i for i in support if i != 0]
return support, location