ItemStudy/file_load.py
2024-05-16 19:38:14 +08:00

124 lines
2.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import os
import random
import pandas
import numpy
# import matplotlib
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
client = OpenAI(api_key="sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599",base_url="http://154.9.28.247:3000/v1/")
def batch():
scales = os.listdir("Scales")
items={}
for i in scales:
with open("Scales/"+i,"r") as scale:
tmp = json.load(scale)
for i in tmp["item"]:
items[i]=tmp["item"][i]
# print(items)
return items
def reverse_list():
scales = os.listdir("Scales")
items=[]
for i in scales:
with open("Scales/"+i,"r") as scale:
tmp = json.load(scale)
items.extend(tmp["reverse"])
# print(items)
return items
def old_type(str):
with open(str,"r") as file:
scale=json.load(file)
new={"item":{}}
for i in scale:
new["item"][i["name"]]=i["label"]
# print(i["name"],i["label"])
with open(str,"w") as file:
file.write(json.dumps(new))
def liguistic(a:str,b:str):
completion = client.chat.completions.create(
model="large",
messages=[
{"role": "system", "content": "请严格按照要求回答,不要有多余解释。\n判断这两句话的语意是否相反相反返回True否则返回False。"},
{"role": "user", "content": a+'\n'+b}
]
)
print(completion.choices[0].message.content)
return 'True' in completion.choices[0].message.content
def calc_similarity(scale):
item=[]
vec=[]
for i in scale:
item.append(i)
vec.append(client.embeddings.create(
input=scale[i], model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small
).data[0].embedding)
simi=cosine_similarity(vec)
que=[]
for i,v in enumerate(simi):
for j in range(0,i):
if liguistic(scale[item[i]],scale[item[j]]):
simi[i][j]*=-1
print("Opposite:",scale[item[i]],scale[item[j]])
que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]})
return que
def similarity(force:bool = False,sort:bool=True):
if force or os.path.getsize("Temp/items.json") == 0:
que=calc_similarity(batch())
with open("Temp/items.json","w") as items:
items.write(json.dumps(que))
else:
with open("Temp/items.json","r") as items:
que = json.load(items)
if sort:
return sorted(que, key = lambda t : t["similarity"], reverse=True)
else:
return que
def make_data():
s=""
item = batch()
for i in item:
s+=i+','
s=s[:-1]+'\n'
for i in range(0,1000):
s += str(random.randint(0,4))
for j in range(1,20):
s += ',' + str(random.randint(0,4))
s+='\n'
with open("test.csv","w") as data:
data.write(s)
def correlation():
try:
data = pandas.read_csv("Work/test.csv")
except:
data = pandas.read_csv("Work/data.csv")
rev=reverse_list()
print(rev)
que={}
for i in data:
for j in data:
if i!=j:
try:
que[i,j]=data[i].corr(data[j])
# if i in rev:
# que[i,j]*=-1
# if j in rev:
# que[i,j]*=-1
que[j,i]=que[i,j]
except:
pass
else:
break
return que