2024-05-14 09:18:57 +00:00
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import random
|
|
|
|
|
|
2024-05-14 14:46:44 +00:00
|
|
|
|
import pandas
|
|
|
|
|
import numpy
|
|
|
|
|
# import matplotlib
|
|
|
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
2024-05-14 09:38:35 +00:00
|
|
|
|
from openai import OpenAI
|
|
|
|
|
|
2024-05-16 11:17:19 +00:00
|
|
|
|
client = OpenAI(api_key="sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599",base_url="http://154.9.28.247:3000/v1/")
|
2024-05-14 09:38:35 +00:00
|
|
|
|
|
2024-05-14 09:18:57 +00:00
|
|
|
|
def batch():
|
|
|
|
|
scales = os.listdir("Scales")
|
|
|
|
|
items={}
|
|
|
|
|
for i in scales:
|
|
|
|
|
with open("Scales/"+i,"r") as scale:
|
|
|
|
|
tmp = json.load(scale)
|
|
|
|
|
for i in tmp["item"]:
|
|
|
|
|
items[i]=tmp["item"][i]
|
|
|
|
|
# print(items)
|
|
|
|
|
return items
|
|
|
|
|
|
2024-05-15 12:20:32 +00:00
|
|
|
|
def reverse_list():
|
|
|
|
|
scales = os.listdir("Scales")
|
|
|
|
|
items=[]
|
|
|
|
|
for i in scales:
|
|
|
|
|
with open("Scales/"+i,"r") as scale:
|
|
|
|
|
tmp = json.load(scale)
|
|
|
|
|
items.extend(tmp["reverse"])
|
|
|
|
|
# print(items)
|
|
|
|
|
return items
|
|
|
|
|
|
2024-05-14 09:18:57 +00:00
|
|
|
|
def old_type(str):
|
|
|
|
|
with open(str,"r") as file:
|
|
|
|
|
scale=json.load(file)
|
|
|
|
|
new={"item":{}}
|
|
|
|
|
for i in scale:
|
|
|
|
|
new["item"][i["name"]]=i["label"]
|
|
|
|
|
# print(i["name"],i["label"])
|
|
|
|
|
with open(str,"w") as file:
|
|
|
|
|
file.write(json.dumps(new))
|
|
|
|
|
|
2024-05-16 11:17:19 +00:00
|
|
|
|
def liguistic(a:str,b:str):
|
|
|
|
|
completion = client.chat.completions.create(
|
|
|
|
|
model="large",
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": "请严格按照要求回答,不要有多余解释。\n判断这两句话的语意是否相反,相反返回True,否则返回False。"},
|
|
|
|
|
{"role": "user", "content": a+'\n'+b}
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
print(completion.choices[0].message.content)
|
|
|
|
|
return 'True' in completion.choices[0].message.content
|
|
|
|
|
|
2024-05-14 14:46:44 +00:00
|
|
|
|
def calc_similarity(scale):
|
|
|
|
|
item=[]
|
|
|
|
|
vec=[]
|
|
|
|
|
for i in scale:
|
|
|
|
|
item.append(i)
|
|
|
|
|
vec.append(client.embeddings.create(
|
2024-05-14 17:17:50 +00:00
|
|
|
|
input=scale[i], model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small
|
2024-05-14 14:46:44 +00:00
|
|
|
|
).data[0].embedding)
|
|
|
|
|
simi=cosine_similarity(vec)
|
|
|
|
|
que=[]
|
|
|
|
|
for i,v in enumerate(simi):
|
|
|
|
|
for j in range(0,i):
|
2024-05-16 11:17:19 +00:00
|
|
|
|
if liguistic(scale[item[i]],scale[item[j]]):
|
|
|
|
|
simi[i][j]*=-1
|
|
|
|
|
print("Opposite:",scale[item[i]],scale[item[j]])
|
2024-05-14 14:46:44 +00:00
|
|
|
|
que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]})
|
|
|
|
|
return que
|
|
|
|
|
|
|
|
|
|
def similarity(force:bool = False,sort:bool=True):
|
2024-05-14 09:38:35 +00:00
|
|
|
|
if force or os.path.getsize("Temp/items.json") == 0:
|
2024-05-14 14:46:44 +00:00
|
|
|
|
que=calc_similarity(batch())
|
2024-05-14 09:38:35 +00:00
|
|
|
|
with open("Temp/items.json","w") as items:
|
|
|
|
|
items.write(json.dumps(que))
|
|
|
|
|
else:
|
|
|
|
|
with open("Temp/items.json","r") as items:
|
|
|
|
|
que = json.load(items)
|
2024-05-14 14:46:44 +00:00
|
|
|
|
if sort:
|
|
|
|
|
return sorted(que, key = lambda t : t["similarity"], reverse=True)
|
|
|
|
|
else:
|
|
|
|
|
return que
|
2024-05-14 09:38:35 +00:00
|
|
|
|
|
2024-05-14 14:46:44 +00:00
|
|
|
|
def make_data():
|
2024-05-14 09:18:57 +00:00
|
|
|
|
s=""
|
|
|
|
|
item = batch()
|
|
|
|
|
for i in item:
|
|
|
|
|
s+=i+','
|
|
|
|
|
s=s[:-1]+'\n'
|
|
|
|
|
for i in range(0,1000):
|
|
|
|
|
s += str(random.randint(0,4))
|
|
|
|
|
for j in range(1,20):
|
|
|
|
|
s += ',' + str(random.randint(0,4))
|
|
|
|
|
s+='\n'
|
2024-05-14 17:17:50 +00:00
|
|
|
|
with open("data.csv","w") as data:
|
2024-05-14 09:18:57 +00:00
|
|
|
|
data.write(s)
|
2024-05-14 14:46:44 +00:00
|
|
|
|
|
2024-05-15 14:10:34 +00:00
|
|
|
|
def correlation():
|
2024-05-14 17:17:50 +00:00
|
|
|
|
data = pandas.read_csv("Work/data.csv")
|
2024-05-15 12:20:32 +00:00
|
|
|
|
rev=reverse_list()
|
|
|
|
|
print(rev)
|
2024-05-14 17:17:50 +00:00
|
|
|
|
que={}
|
2024-05-14 14:46:44 +00:00
|
|
|
|
for i in data:
|
|
|
|
|
for j in data:
|
2024-05-15 06:25:16 +00:00
|
|
|
|
if i!=j:
|
|
|
|
|
try:
|
|
|
|
|
que[i,j]=data[i].corr(data[j])
|
2024-05-16 11:17:19 +00:00
|
|
|
|
# if i in rev:
|
|
|
|
|
# que[i,j]*=-1
|
|
|
|
|
# if j in rev:
|
|
|
|
|
# que[i,j]*=-1
|
2024-05-15 06:25:16 +00:00
|
|
|
|
que[j,i]=que[i,j]
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
break
|
2024-05-14 17:17:50 +00:00
|
|
|
|
return que
|