本人自學python小白一個 ,主要接觸生信方面 , 現在正在做的任務列印結果非常慢 ,請各位大佬幫我看一下代碼有沒有問題
如果沒有 , 可以怎么提高這個代碼的運行速度 , 非常感謝!
問題描述:
[['ARG|400-C', '24.961', '47.658', '46.557'], ['ARG|400-O', '23.763', '47.387', '46.471'], ['ARG|400-CB', '25.038', '49.079', '48.617'], ['ARG|400-CG', '25.387', '49.190', '50.096'], ['ARG|400-CD', '24.513', '50.230', '50.789'], ['ARG|400-NE', '24.794', '51.592', '50.341'], ['ARG|400-CZ', '25.802', '52.335', '50.790'], ['ARG|400-NH1', '26.631', '51.851', '51.707'], ['ARG|400-NH2', '25.983', '53.562', '50.320'], ['GLU|401-N', '25.744', '47.804', '45.492'], ['GLU|401-CA', '25.224', '47.639', '44.139'], ['GLU|401-C', '24.889', '46.173', '43.874'], ['GLU|401-O', '25.182', '45.334', '44.752'], ['GLU|401-CB', '26.251', '48.128', '43.112'], ['GLU|401-CG', '26.469', '49.634', '43.114'], ['GLU|401-CD', '25.241', '50.398', '42.657'], ['GLU|401-OE1', '24.808', '50.191', '41.503'], ['GLU|401-OE2', '24.707', '51.205', '43.448']]
這個串列中的元素為例 ,每一個子串列的數字元素表示了這個氨基酸的三維坐標 ,每一個子串列的【0】號元素‘--’之前的字串比表示一個氨基酸如 ‘ARG|400-c ’ 和 ‘ARG|400-O’都表示. 'ARG|400' 這一個氨基酸 , ‘——’ 后面的字串表示一個原子 ,如‘ARG|400-c’表示 ‘ARG|400’這個氨基酸的c 原子 , 我想計算某一個氨基酸的所有的原子到另一個氨基酸的所有原子三維空間距離的最小值 ,都進行同樣的計算,最后存盤為新的串列 , 每一項的大概這樣的格式: ‘ 氨基酸1 -> 氨基酸2 : 氨基酸中所有的元素與氨基酸二所有原子之間距離的最小值 ’
希望大佬們可以幫助我解決這個問題! 非常感謝!
from math import sqrt
from math import pow
import collections
from numba import jit
import numpy as np
f = open('/Users/xrk.study/Desktop/1iir.txt' , 'r')
r = f.readlines()
f1 = open('/Users/xrk.study/Desktop/1.txt' , 'w')
f2= open('/Users/xrk.study/Desktop/1iir_res.txt' ,'r')
l = []
for line in f2 :
l.append(line.split())
lv = []
for j in range(len(l)) :
lv.append(l[j][2].replace(':' , ''))
print(lv[:10])
p = []
for i in range(len(r)) :
if r[i].startswith('ATOM') :
p.append(r[i].split())
else :
continue
#print(p[:10])
def x(list , str) :
p1 = []
for i in range(len(list)) :
if list[i][3] + '|' + list[i][5] == str :
p1.append(list[i][3] + '|' + list[i][5] + '-' + list[i][2] +':'+list[i][6] + ','+ list[i][7] +',' +list[i][8])
return p1
k1 = []
for i in range(len(lv)) :
k1.append(x(p , lv[i]))
print(k1[:10])
#下面 , 我們繼續對格式進行處理
tag1 = []
for i in range(len(k1)) :
for j in range(len(k1[i])) :
c = k1[i][j].replace(':' , ',')
d = c.split(',')
tag1.append(d
)
print(tag1)
def math_solove(list1 , list2) :
return sqrt(pow(float(list1[1])- float(list2[1]) ,2) + pow(float(list1[2])-float(list2[2]) , 2) + pow(float(list1[3])-float(list2[3]) , 2))
a = np.eye(len(lv) , len(lv))
def fl(list , str1 , str2 ) :
u = []
for i in range(len(list)) :
for j in range(len(list)) :
if str1 == str2 :
continue
else :
if list[i][0][0:list[i][0].index('-')] == str1 and list[j][0][0:list[j][0].index('-')] == str2 :
u.append(math_solove(list[i] , list[j]))
return min(u)
u1 = []
for i in range(len(lv)) :
for j in range(len(lv)) :
if i == j :
continue
else :
u1.append('{}->{} : {}'.format(lv[i] , lv[j] , fl(tag1 , lv[i] , lv[j])))
print(u1)
uj5u.com熱心網友回復:
import time
from math import sqrt
import pandas as pd
def init():
li = [ ['ARG|400-O', '23.763', '47.387', '46.471'], ['ARG|400-C', '24.961', '47.658', '46.557'],['ARG|400-CB', '25.038', '49.079', '48.617'], ['ARG|400-CG', '25.387', '49.190', '50.096'], ['ARG|400-CD', '24.513', '50.230', '50.789'], ['ARG|400-NE', '24.794', '51.592', '50.341'], ['ARG|400-CZ', '25.802', '52.335', '50.790'], ['ARG|400-NH1', '26.631', '51.851', '51.707'], ['ARG|400-NH2', '25.983', '53.562', '50.320'], ['GLU|401-N', '25.744', '47.804', '45.492'], ['GLU|401-CA', '25.224', '47.639', '44.139'], ['GLU|401-C', '24.889', '46.173', '43.874'], ['GLU|401-O', '25.182', '45.334', '44.752'], ['GLU|401-CB', '26.251', '48.128', '43.112'], ['GLU|401-CG', '26.469', '49.634', '43.114'], ['GLU|401-CD', '25.241', '50.398', '42.657'], ['GLU|401-OE1', '24.808', '50.191', '41.503'], ['GLU|401-OE2', '24.707', '51.205', '43.448']]
info = {}
for i in li:
anjsName, atom_name = i[0].split('-')
loc = i[1:]
if anjsName not in info.keys():
info[anjsName] = {'locs': [loc], 'atom_names': [atom_name]}
else:
info[anjsName]['locs'].append(loc)
info[anjsName]['atom_names'].append(atom_name)
for k,v in info.items():
print(k,v)
return info
class ClassAnjs():
def __init__(self, locs, name, atom_names):
'''
初始化一個氨基酸類
:param name: 氨基酸名字
:param atom_names: 原子名字的串列
:param locs: 所有原子的坐標串列
'''
self.name = name
self.otom_names = atom_names
self.locs = locs
def calc_distance(self, obj):
'''
計算本氨基酸與目標氨基酸obj所有原子最短距離并列印
:param obj:
:return:
'''
min_value, (index1, index2) = self._calc(self.locs, obj.locs)
print('計算完畢:')
print('{}氨基酸中索引是{}的{}原子->{}氨基酸中索引是{}的{}原子之間距離最近,值為:{}'.format(self.name,
index1,
self.otom_names[index1],
obj.name,
index2,
obj.otom_names[index2],
min_value))
@classmethod
def calc(cls, obj1, obj2):
'''
計算氨基酸obj1與氨基酸obj2所有原子最短距離并列印
:param obj1:
:param obj2:
:return:
'''
min_value, (index1, index2) = cls._calc(ClassAnjs, obj1.locs, obj2.locs)
print('計算完畢:')
print('{}氨基酸中索引是{}的{}原子->{}氨基酸中索引是{}的{}原子之間距離最近,值為:{}'.format(obj1.name,
index1,
obj1.otom_names[index1],
obj2.name,
index2,
obj2.otom_names[index2],
min_value))
def _calc(self, loc1, loc2):
'''
計算loc1內與loc2內所有點的最短激勵
:param loc1:
:param loc2:
:return:
'''
df1 = pd.DataFrame(loc1, columns=['x', 'y', 'z'], dtype=float)
df2 = pd.DataFrame(loc2, columns=['x', 'y', 'z'], dtype=float)
row, _ = df2.shape
min_value_list = []
min_value_index = []
# df1內所有點分別與df2所有點求距離
for r in range(row):
df = df1 - df2.loc[r] # df所有點與df2第r行的點求距離
for colunm in ['x', 'y', 'z']:
df[colunm] = df[colunm].map(lambda x: x ** 2)
vals = list(df.apply(lambda x: sqrt(x.sum()), axis=1))
val = min(vals)
index = vals.index(val)
min_value_index.append((index, r))
min_value_list.append(val)
min_value = min(min_value_list)
index = min_value_list.index(min_value)
return min_value, min_value_index[index]
if __name__ == '__main__':
data = init()
anjs_list = []
for name,v in data.items():
anjs = ClassAnjs( atom_names=v['atom_names'], locs=v['locs'], name=name)
anjs_list.append(anjs)
anjs1 = anjs_list[0]
anjs2 = anjs_list[1]
start_time = time.time()
#第一種方法
anjs1.calc_distance(anjs2)
end_time = time.time()
print('消耗時間:', end_time-start_time)
# 第二種方法
ClassAnjs.calc(anjs1, anjs2)
print('消耗時間:', time.time()-end_time)
'''
計算完畢:
GLU|401氨基酸中索引是0的N原子->ARG|400氨基酸中索引是1的C原子之間距離最近,值為:1.3298984923669972
消耗時間: 0.09094572067260742
計算完畢:
GLU|401氨基酸中索引是0的N原子->ARG|400氨基酸中索引是1的C原子之間距離最近,值為:1.3298984923669972
消耗時間: 0.07195687294006348
'''
uj5u.com熱心網友回復:
科學計算要提高代碼的速度就用numpy,pandas之類的庫去執行計算,少用for,資料大的時候會快很多,如果還嫌不夠快就不要用python,用c或c++去實作計算程序uj5u.com熱心網友回復:
好的, 謝謝!uj5u.com熱心網友回復:
明白了 , 非常感謝!轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/29939.html
上一篇:用R語言處理批間差時出現如下錯誤
