ItemStudy/file_load.py

124 lines
2.9 KiB
Python
Raw Permalink Normal View History

import json
import os
import random
import pandas
import numpy
# import matplotlib
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
client = OpenAI(api_key="sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599",base_url="http://154.9.28.247:3000/v1/")
def batch():
scales = os.listdir("Scales")
items={}
for i in scales:
with open("Scales/"+i,"r") as scale:
tmp = json.load(scale)
for i in tmp["item"]:
items[i]=tmp["item"][i]
# print(items)
return items
2024-05-15 12:20:32 +00:00
def reverse_list():
scales = os.listdir("Scales")
items=[]
for i in scales:
with open("Scales/"+i,"r") as scale:
tmp = json.load(scale)
items.extend(tmp["reverse"])
# print(items)
return items
def old_type(str):
with open(str,"r") as file:
scale=json.load(file)
new={"item":{}}
for i in scale:
new["item"][i["name"]]=i["label"]
# print(i["name"],i["label"])
with open(str,"w") as file:
file.write(json.dumps(new))
def liguistic(a:str,b:str):
completion = client.chat.completions.create(
model="large",
messages=[
{"role": "system", "content": "请严格按照要求回答,不要有多余解释。\n判断这两句话的语意是否相反相反返回True否则返回False。"},
{"role": "user", "content": a+'\n'+b}
]
)
print(completion.choices[0].message.content)
return 'True' in completion.choices[0].message.content
def calc_similarity(scale):
item=[]
vec=[]
for i in scale:
item.append(i)
vec.append(client.embeddings.create(
2024-05-14 17:17:50 +00:00
input=scale[i], model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small
).data[0].embedding)
simi=cosine_similarity(vec)
que=[]
for i,v in enumerate(simi):
for j in range(0,i):
if liguistic(scale[item[i]],scale[item[j]]):
simi[i][j]*=-1
print("Opposite:",scale[item[i]],scale[item[j]])
que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]})
return que
def similarity(force:bool = False,sort:bool=True):
if force or os.path.getsize("Temp/items.json") == 0:
que=calc_similarity(batch())
with open("Temp/items.json","w") as items:
items.write(json.dumps(que))
else:
with open("Temp/items.json","r") as items:
que = json.load(items)
if sort:
return sorted(que, key = lambda t : t["similarity"], reverse=True)
else:
return que
def make_data():
s=""
item = batch()
for i in item:
s+=i+','
s=s[:-1]+'\n'
for i in range(0,1000):
s += str(random.randint(0,4))
for j in range(1,20):
s += ',' + str(random.randint(0,4))
s+='\n'
2024-05-16 11:38:14 +00:00
with open("test.csv","w") as data:
data.write(s)
2024-05-15 14:10:34 +00:00
def correlation():
2024-05-16 11:38:14 +00:00
try:
data = pandas.read_csv("Work/test.csv")
except:
data = pandas.read_csv("Work/data.csv")
2024-05-15 12:20:32 +00:00
rev=reverse_list()
print(rev)
2024-05-14 17:17:50 +00:00
que={}
for i in data:
for j in data:
if i!=j:
try:
que[i,j]=data[i].corr(data[j])
# if i in rev:
# que[i,j]*=-1
# if j in rev:
# que[i,j]*=-1
que[j,i]=que[i,j]
except:
pass
else:
break
2024-05-14 17:17:50 +00:00
return que