如何解决在函数中定义字典时python中的内存泄漏
我有一个带有函数的程序,需要打开大泡菜文件(几GB),查看获得的字典(dict),并返回它的部分视图(几个元素)。奇怪的是,该函数打开的大量数据保留在内存中。 所以我用下面的代码做了一些测试:
import numpy as np
def test_mem_1():
data = np.random.random((2**27)) #1 GB
input("Data generated,awaiting input to continue")
return 4
def test_mem_2():
keys = list(range(100000))
lvls = list(range(10))
data = {}
for k in keys:
data[k] = {}
for lvl in lvls:
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
input("Data generated,awaiting input to continue")
data = None
return 4
if __name__ == "__main__":
a = test_mem_1()
input("Tested mem 1,continue to test mem 2")
a = test_mem_2()#Memory usage falls from 995 MB inside test_mem_1 to 855 MB when returned
input("Finished")
exit()
运行此实验时,第一个测试分配 1 GB,然后在函数返回后立即释放此数据。同时,第二个测试(使用 dict)首先分配 995 MB,然后,当函数返回时,只有 140 MB 被释放(导致在 test_mem_2 之后的内存占用为 855 MB)。 这里发生了什么?我怎样才能释放这个内存?
附言
我尝试用几种方法删除 test_mem_2 中的数据:不做任何事情,使用“del”,分配给新的字典,以及(如本例中)将引用分配给 None
解决方法
评论讨论后回答。
内存管理由python本身使用垃圾收集处理。通常你根本不应该触摸它。垃圾收集在python中是自动的。除非你真的有充分的理由去搞砸它,否则不要。
但是,您可以强制进行垃圾回收,这在处理资源有限的系统时会很有用。
我已将您的代码与一个函数结合使用,以获取我从 this excellent answer 无耻地窃取的内存使用情况,并实现了最基本的垃圾收集...
通过多次运行 loopity()
,我还没有让它崩溃。
请注意,我确实在 data = None
test_mem_1()
文件:memleak.py
import numpy as np
import sys
import gc
import tracemalloc
import linecache
import os
tracemalloc.start()
def display_top(snapshot,key_type='lineno',limit=3,where=''):
#
#
# Shamelessly stolen from:
# https://stackoverflow.com/a/45679009/9267296
#
# I replaced all old string formatting with f-strings
#
#
print('======================================================================')
if where != '':
print(f'Printing stats:\n {where}')
print('======================================================================')
snapshot = snapshot.filter_traces((
tracemalloc.Filter(False,'<frozen importlib._bootstrap>'),tracemalloc.Filter(False,'<unknown>'),))
top_stats = snapshot.statistics(key_type)
print(f'Top {limit} lines')
for index,stat in enumerate(top_stats[:limit],1):
frame = stat.traceback[0]
# replace '/path/to/module/file.py' with 'module/file.py'
filename = os.sep.join(frame.filename.split(os.sep)[-2:])
print(f'#{index}: {filename}:{frame.lineno}: {stat.size / 1024:.1f} KiB')
line = linecache.getline(frame.filename,frame.lineno).strip()
if line:
print(f' {line}')
other = top_stats[limit:]
if other:
size = sum(stat.size for stat in other)
print(f'{len(other)} other: {size / 1024:.1f} KiB')
total = sum(stat.size for stat in top_stats)
print()
print(f'=====> Total allocated size: {total / 1024:.1f} KiB')
print()
def test_mem_1():
display_top(tracemalloc.take_snapshot(),where='test_mem_1: start')
data = np.random.random((2**27)) #1 GB
display_top(tracemalloc.take_snapshot(),where='test_mem_1: data generated')
input('Data generated,awaiting input to continue')
data = None
display_top(tracemalloc.take_snapshot(),where='test_mem_1: data == None')
gc.collect()
display_top(tracemalloc.take_snapshot(),where='test_mem_1: gc collected')
return 4
def test_mem_2():
display_top(tracemalloc.take_snapshot(),where='test_mem_2: start')
keys = list(range(100000))
lvls = list(range(10))
display_top(tracemalloc.take_snapshot(),where='test_mem_2: lists generated')
data = {}
for k in keys:
data[k] = {}
for lvl in lvls:
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
display_top(tracemalloc.take_snapshot(),where='test_mem_2: np data generated')
input('Data generated,where='test_mem_2: data == None')
gc.collect()
display_top(tracemalloc.take_snapshot(),where='test_mem_2: gc collected')
return 4
def loopity():
# added this logic to be able to run multiple times.
# stops when input for finished != ''
inp = ''
while inp == '':
display_top(tracemalloc.take_snapshot(),where='loopity: start')
a = test_mem_1()
display_top(tracemalloc.take_snapshot(),where='loopity: test_mem_1 done')
input('Tested mem 1,continue to test mem 2')
a = test_mem_2()
display_top(tracemalloc.take_snapshot(),where='loopity: test_mem_2 done')
inp = input('Finished')
if __name__ == '__main__':
loopity()
这是运行 python 3.8.10 的 Windows 机器的输出(别问):
======================================================================
Printing stats:
loopity: start
======================================================================
Top 3 lines
#1: .\memleak.py:93: 0.1 KiB
def loopity():
#2: .\memleak.py:69: 0.1 KiB
def test_mem_2():
#3: .\memleak.py:53: 0.1 KiB
def test_mem_1():
1 other: 0.1 KiB
=====> Total allocated size: 0.5 KiB
======================================================================
Printing stats:
test_mem_1: start
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 8.3 KiB
lines = fp.readlines()
#2: .\memleak.py:39: 1.2 KiB
line = linecache.getline(frame.filename,frame.lineno).strip()
#3: lib\tracemalloc.py:509: 1.2 KiB
statistics.sort(reverse=True,key=Statistic._sort_key)
59 other: 20.4 KiB
=====> Total allocated size: 31.1 KiB
======================================================================
Printing stats:
test_mem_1: data generated
======================================================================
Top 3 lines
#1: .\memleak.py:56: 1048576.3 KiB
data = np.random.random((2**27)) #1 GB
#2: lib\linecache.py:137: 63.9 KiB
lines = fp.readlines()
#3: lib\tracemalloc.py:65: 3.8 KiB
return (self.size,self.count,self.traceback)
59 other: 26.3 KiB
=====> Total allocated size: 1048670.3 KiB
Data generated,awaiting input to continue
======================================================================
Printing stats:
test_mem_1: data == None
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#2: lib\tracemalloc.py:532: 5.8 KiB
traces = _get_traces()
#3: lib\tracemalloc.py:65: 3.9 KiB
return (self.size,self.traceback)
66 other: 25.2 KiB
=====> Total allocated size: 98.6 KiB
======================================================================
Printing stats:
test_mem_1: gc collected
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#2: .\memleak.py:39: 1.2 KiB
line = linecache.getline(frame.filename,key=Statistic._sort_key)
56 other: 19.0 KiB
=====> Total allocated size: 85.3 KiB
======================================================================
Printing stats:
loopity: test_mem_1 done
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#2: lib\tracemalloc.py:65: 3.7 KiB
return (self.size,self.traceback)
#3: lib\tracemalloc.py:185: 2.8 KiB
self._frames = tuple(reversed(frames))
70 other: 22.9 KiB
=====> Total allocated size: 93.2 KiB
Tested mem 1,continue to test mem 2
======================================================================
Printing stats:
test_mem_2: start
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#2: lib\tracemalloc.py:65: 4.6 KiB
return (self.size,self.traceback)
#3: lib\tracemalloc.py:532: 4.5 KiB
traces = _get_traces()
71 other: 26.8 KiB
=====> Total allocated size: 99.7 KiB
======================================================================
Printing stats:
test_mem_2: lists generated
======================================================================
Top 3 lines
#1: .\memleak.py:72: 3508.7 KiB
keys = list(range(100000))
#2: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#3: lib\tracemalloc.py:532: 9.2 KiB
traces = _get_traces()
73 other: 31.6 KiB
=====> Total allocated size: 3613.3 KiB
======================================================================
Printing stats:
test_mem_2: np data generated
======================================================================
Top 3 lines
#1: .\memleak.py:80: 911719.1 KiB
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
#2: .\memleak.py:78: 11370.0 KiB
data[k] = {}
#3: .\memleak.py:72: 3508.7 KiB
keys = list(range(100000))
71 other: 96.4 KiB
=====> Total allocated size: 926694.2 KiB
Data generated,awaiting input to continue
======================================================================
Printing stats:
test_mem_2: data == None
======================================================================
Top 3 lines
#1: .\memleak.py:72: 3508.7 KiB
keys = list(range(100000))
#2: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#3: .\memleak.py:80: 5.7 KiB
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
75 other: 37.6 KiB
=====> Total allocated size: 3615.8 KiB
======================================================================
Printing stats:
test_mem_2: gc collected
======================================================================
Top 3 lines
#1: .\memleak.py:72: 3508.7 KiB
keys = list(range(100000))
#2: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#3: .\memleak.py:80: 5.5 KiB
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
60 other: 22.0 KiB
=====> Total allocated size: 3600.0 KiB
======================================================================
Printing stats:
loopity: test_mem_2 done
======================================================================
Top 3 lines
#1: lib\linecache.py:137: 63.8 KiB
lines = fp.readlines()
#2: .\memleak.py:80: 5.5 KiB
data[k][lvl] = np.random.random(100) #1'000'000 X 8 = 800 MB
#3: lib\tracemalloc.py:65: 3.9 KiB
return (self.size,self.traceback)
73 other: 26.4 KiB
=====> Total allocated size: 99.7 KiB
Finished
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。