- import turtle
- count = 10
- data = []
- words = []
- yScale = 2
- xScale = 30
- def drawLine(t, x1, y1, x2, y2):
- t.penup()
- t.goto(x1,y1)
- t.pendown()
- t.goto(x2,y2)
- def drawText(t, x, y, text):
- t.penup()
- t.goto(x,y)
- t.pendown()
- t.write(text)
- def drawGraph(t):
- drawLine(t,0,0,360,0)
- drawLine(t,0,300,0,0)
- for x in range(count):
- x = x + 1
- drawText(t, x*xScale-4, -20,(words[x-1]))
- drawText(t, x*xScale-4, data[x-1]*yScale+10,data[x-1])
- drawBars(t)
- def drawRectangle(t,x,y):
- x = x*xScale
- y = y*yScale
- drawLine(t, x-5,0,x-5,y)
- drawLine(t, x-5,y,x+5,y)
- drawLine(t,x+5,y,x+5,0)
- drawLine(t,x+5,0,x-5,0)
- def drawBars(t):
- for i in range(count):
- drawRectangle(t,i+1,data[i])
- # 计数
- def processsLine(line, wordCounts):
- line = replacePunctuations(line)
- words = line.split()
- for word in words:
- if word in wordCounts:
- wordCounts[word] += 1
- else:
- wordCounts[word] = 1
- # 空格替换标点符号
- def replacePunctuations(line):
- for ch in line:
- if ch in "~!@#$%^&*()_+-=[]{};:'|"",./?<>":
- line = line.replace(ch, " ")
- return line
- def main():
- # 处理文件
- # filename = input('enter a filename:').strip()
- filename = r'C:\Users\Administrator\Desktop\news.txt'
- infile = open(filename, 'r', encoding='utf-8')
- wordCounts = {}
- for line in infile:
- processsLine(line.lower(), wordCounts)
- # 从字典中获取数据对
- pairs = list(wordCounts.items())
- # 列表中的数据对排序
- items = [[x,y] for [y,x] in pairs]
- items.sort()
- #输出count个数词频结果
- for i in range(len(items)-1, len(items)-count-1,-1):
- print(items[i][1]+"\t"+str(items[i][0]))
- data.append(items[i][0])
- words.append(items[i][1])
- # 绘制柱状图
- turtle.title("词频结果柱状图")
- turtle.setup(900, 750, 100,100)
- t = turtle.Turtle()
- t.hideturtle()
- t.width(3)
- drawGraph(t)
- turtle.done()
- if __name__ == '__main__':
- main()
复制代码
网上找的,似乎可以统计记事本上的英文单词词频 |