完善了运算

This commit is contained in:
mxr612 2024-05-15 01:17:50 +08:00
parent 2a2e5f9d27
commit aa956261b8
3 changed files with 18 additions and 26 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
__pycache__/* __pycache__/*
Temp/ Temp/
Scales/ Scales/
Work/

View File

@ -40,7 +40,7 @@ def calc_similarity(scale):
for i in scale: for i in scale:
item.append(i) item.append(i)
vec.append(client.embeddings.create( vec.append(client.embeddings.create(
input=scale[i], model="text-embedding-3-small" # nomic-embed-text text-embedding-3-small input=scale[i], model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small
).data[0].embedding) ).data[0].embedding)
simi=cosine_similarity(vec) simi=cosine_similarity(vec)
que=[] que=[]
@ -73,24 +73,17 @@ def make_data():
for j in range(1,20): for j in range(1,20):
s += ',' + str(random.randint(0,4)) s += ',' + str(random.randint(0,4))
s+='\n' s+='\n'
with open("Temp/data.csv","w") as data: with open("data.csv","w") as data:
data.write(s) data.write(s)
def corelation(sort:bool=True): def corelation():
data = pandas.read_csv("data.csv") data = pandas.read_csv("Work/data.csv")
que=[] que={}
for i in data: for i in data:
for j in data: for j in data:
try: try:
if(i != j): que[i,j]=data[i].corr(data[j])
# que[i,j]["psr"]=data[i].corr(data[j])
que.append({"from":j,"to":i,"psr":data[i].corr(data[j])})
else:
pass
except: except:
pass pass
if sort:
return sorted(que,key = lambda t : abs(t["psr"]), reverse=True)
else:
return que return que

20
main.py
View File

@ -10,23 +10,21 @@ import numpy
similarity = file_load.similarity() similarity = file_load.similarity()
corelation = file_load.corelation() corelation = file_load.corelation()
table = {}
for i in corelation:
table[i["from"],i["to"]]=i["psr"]
x=[] x=[]
y=[] y=[]
s="similarity, corelation\n"
for i in similarity: for i in similarity:
x.append(abs(table[i["from"],i["to"]])) try:
y.append(i["similarity"]) s+=str(i["similarity"])+','+str(corelation[i["from"],i["to"]])+'\n'
x.append(i["similarity"])
y.append(corelation[i["from"],i["to"]])
except:
pass
print(numpy.corrcoef(x,y)[0,1]) print(numpy.corrcoef(x,y)[0,1])
s="similarity, corelation\n" with open("Work/point.csv","w") as point:
for i in similarity:
s+=str(i["similarity"])+','+str(table[i["from"],i["to"]])+'\n'
with open("Temp/point.csv","w") as point:
point.write(s) point.write(s)