python topk
生成数组
百万元素
import numpy as np
inputs = [ np.random.random() for _ in range(1000000)]
计时器
from functools import wraps
import time
def func_timer(function):
@wraps(function)
def function_timer(*args, **kwargs):
print('[Function: {name} start]'.format(name = function.__name__))
t0 = time.time()
result = function(*args, **kwargs)
t1 = time.time()
print('[Function: {name} finished, spent time: {time:.2f}s]'.format(name = function.__name__,time = t1 - t0))
return result
return function_timer
算法
O ( N k l o g ( k ) ) O\big(Nklog(k)\big) O(Nklog(k))
@func_timer
def topk(inputs, k):
assert(len(inputs)>k and k>=0)
output = []
for number in inputs:
if len(output) < k:
output.append(number)
else:
# output = heapq.nsmallest(k, output) # 2.18s
# output = sorted(output)[:k] # 1.70s
output.sort() # 用时:1.07s
if number < output[0]:
continue
else:
output[0] = number
return output[::-1]
print(topk(inputs,100))
'''
[Function: topk start...]
[Function: topk finished, spent time: 1.07s]
[0.9999996850874242, 0.9999986732620001, 0.9999966250630344, 0.9999965512918702, 0.9999918270818978, 0.9999908482147212, 0.9999904061966568, 0.9999898873637068, 0.9999894591229037, 0.9999885915510304, 0.9999880191296788, 0.9999875751372768, 0.9999871692714755, 0.9999864191450215, 0.9999856657239012, 0.9999844443591834, 0.999983190441878, 0.9999820489498568, 0.9999810894337499, 0.9999798321198289, 0.9999786907049485, 0.9999783739745713, 0.9999778918753542, 0.9999776934842499, 0.9999766904951057, 0.9999763063439243, 0.9999721491854319, 0.9999718129550014, 0.9999690380974163, 0.9999667084475456, 0.9999655424378023, 0.9999652281775585, 0.9999627432692645, 0.9999609286879659, 0.9999595897335395, 0.9999577801424298, 0.9999577731076444, 0.9999577422439052, 0.9999573787572927, 0.9999545285610502, 0.9999536027375621, 0.9999517383688369, 0.9999505332989274, 0.9999467936557633, 0.9999462369888729, 0.9999434710302963, 0.9999428328375325, 0.9999415741653278, 0.9999395817201105, 0.9999383994805728, 0.9999372632501194, 0.9999353698560306, 0.9999347634017355, 0.9999342421914866, 0.9999333493394821, 0.9999327463812188, 0.9999326525157203, 0.9999323359413231, 0.9999319116353387, 0.9999318213132441, 0.9999311379877591, 0.9999278784463489, 0.999927795786208, 0.9999275307723007, 0.9999256814570631, 0.9999246819583866, 0.9999242658863041, 0.99992388656362, 0.9999218118780165, 0.9999214050287274, 0.9999213871879659, 0.9999212511033996, 0.9999200515736231, 0.9999186201014783, 0.9999177853796155, 0.9999141870066893, 0.9999122366576134, 0.9999109936676648, 0.9999096934306023, 0.9999096419907019, 0.9999089127419833, 0.99990882593983, 0.9999087135374473, 0.9999059676663873, 0.9999043718775731, 0.9999041573645822, 0.9999036853969006, 0.9999032218497446, 0.9999027069924663, 0.999901741000922, 0.9999016699195791, 0.9999009761556364, 0.9998998985072448, 0.9998987815252058, 0.9998975056695498, 0.9998950521471704, 0.9998944129310622, 0.9998942554330341, 0.9998936486586968, 0.9998927647795649]
'''
最强的还是 heapq
O ( N l o g ( k ) ) O\big(Nlog(k)\big) O(Nlog(k))
0.24s
import heapq
import random
import time
class TopkHeap(object):
def __init__(self, k):
self.k = k
self.data = []
def Push(self, elem):
if len(self.data) < self.k:
heapq.heappush(self.data, elem)
else:
topk_small = self.data[0]
if elem > topk_small:
heapq.heapreplace(self.data, elem)
def TopK(self):
return [x for x in reversed([heapq.heappop(self.data) for _ in range(len(self.data))])]
start=time.time()
th = TopkHeap(100)
for i in inputs:
th.Push(i)
print(th.TopK())
print("cost time",time.time()-start)
'''
[0.9999996850874242, 0.9999986732620001, 0.9999966250630344, 0.9999965512918702, 0.9999918270818978, 0.9999908482147212, 0.9999904061966568, 0.9999898873637068, 0.9999894591229037, 0.9999885915510304, 0.9999880191296788, 0.9999875751372768, 0.9999871692714755, 0.9999864191450215, 0.9999856657239012, 0.9999844443591834, 0.999983190441878, 0.9999820489498568, 0.9999810894337499, 0.9999798321198289, 0.9999786907049485, 0.9999783739745713, 0.9999778918753542, 0.9999776934842499, 0.9999766904951057, 0.9999763063439243, 0.9999721491854319, 0.9999718129550014, 0.9999690380974163, 0.9999667084475456, 0.9999655424378023, 0.9999652281775585, 0.9999627432692645, 0.9999609286879659, 0.9999595897335395, 0.9999577801424298, 0.9999577731076444, 0.9999577422439052, 0.9999573787572927, 0.9999545285610502, 0.9999536027375621, 0.9999517383688369, 0.9999505332989274, 0.9999467936557633, 0.9999462369888729, 0.9999434710302963, 0.9999428328375325, 0.9999415741653278, 0.9999395817201105, 0.9999383994805728, 0.9999372632501194, 0.9999353698560306, 0.9999347634017355, 0.9999342421914866, 0.9999333493394821, 0.9999327463812188, 0.9999326525157203, 0.9999323359413231, 0.9999319116353387, 0.9999318213132441, 0.9999311379877591, 0.9999278784463489, 0.999927795786208, 0.9999275307723007, 0.9999256814570631, 0.9999246819583866, 0.9999242658863041, 0.99992388656362, 0.9999218118780165, 0.9999214050287274, 0.9999213871879659, 0.9999212511033996, 0.9999200515736231, 0.9999186201014783, 0.9999177853796155, 0.9999141870066893, 0.9999122366576134, 0.9999109936676648, 0.9999096934306023, 0.9999096419907019, 0.9999089127419833, 0.99990882593983, 0.9999087135374473, 0.9999059676663873, 0.9999043718775731, 0.9999041573645822, 0.9999036853969006, 0.9999032218497446, 0.9999027069924663, 0.999901741000922, 0.9999016699195791, 0.9999009761556364, 0.9998998985072448, 0.9998987815252058, 0.9998975056695498, 0.9998950521471704, 0.9998944129310622, 0.9998942554330341, 0.9998936486586968, 0.9998927647795649]
cost time 0.24733829498291016
'''