From 727d4e57a87c2f03e3572e21a7ac9ccfb8f87698 Mon Sep 17 00:00:00 2001 From: mxr612 Date: Tue, 14 May 2024 04:50:01 +0800 Subject: [PATCH] sorted similarity of item link --- ItemRelate.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++ TestFile/adhd.json | 36 +++++++++++++++++++++++++++++++++ embedding.py | 25 ----------------------- main.py | 10 ++++++++++ 4 files changed, 96 insertions(+), 25 deletions(-) create mode 100644 ItemRelate.py create mode 100644 TestFile/adhd.json delete mode 100644 embedding.py create mode 100644 main.py diff --git a/ItemRelate.py b/ItemRelate.py new file mode 100644 index 0000000..e44b7c0 --- /dev/null +++ b/ItemRelate.py @@ -0,0 +1,50 @@ +import os +os.environ["OPENAI_API_KEY"]= "sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599" +os.environ["OPENAI_BASE_URL"]= "http://154.9.28.247:3000/v1/" + +from openai import OpenAI +client = OpenAI() + +from sklearn.metrics.pairwise import cosine_similarity + +import json + +def embedding(s:str): + if len(s)==0: + return + else: + return client.embeddings.create( + input=s, model="text-embedding-3-small" # nomic-embed-text text-embedding-3-small + ).data[0].embedding + +class edge: + a:str + b:str + sim:float + def __init__(self, a:str, b:str, sim:float): + self.a=a + self.b=b + self.sim=sim + +def main(scale:json): + + item=[] + vec=[] + + for i in scale["item"]: + item.append(i) + vec.append(embedding(scale["item"][i])) + + simi=cosine_similarity(vec) + + que=[] + + for i,v in enumerate(simi): + for j in range(0,i): + # print(simi[i][j],',',item[i],',',item[j]) + que.append(edge(item[i], item[j], simi[i][j])) + # print("\n") + + sorted(que,key= lambda t : t.sim) + + return que \ No newline at end of file diff --git a/TestFile/adhd.json b/TestFile/adhd.json new file mode 100644 index 0000000..117c728 --- /dev/null +++ b/TestFile/adhd.json @@ -0,0 +1,36 @@ +{ + "name":"成人多动症自筛表 (6 itmes)", + "introduction":1, + "item":{ + "asrs1": "当一项工作最有挑战性的部分完成后,你经常难以做到对细节部分更精益求精吗?", + "asrs2": "你经常很难把有组织性的任务、事情布置得井井有条吗? ", + "asrs3": "当你从事一项需要脑力的工作的时候,你经常回避或者延长开始的时间吗?", + "asrs4": "你经常很难记住约会或者一些必须完成的事情吗?", + "asrs5": "当您必须安静地坐很长时间时,是否经常表现得坐立不安,手脚动作多?", + "asrs6": "你是否在自觉做事情时表现得极度活跃,非完成某项事情不可,好像在被发动机所驱使一样?" + }, + "value": { + "从不":0, + "很少":1, + "有时":2, + "经常":3, + "总是":4 + }, + "result":[ + { + "name": "ADHD", + "sum": [ + "asrs1", + "asrs2", + "asrs3", + "asrs4", + "asrs5", + "asrs6" + ], + "re_sum": [], + "result": { + "0": 2 + } + } + ] +} \ No newline at end of file diff --git a/embedding.py b/embedding.py deleted file mode 100644 index 5306bfd..0000000 --- a/embedding.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -os.environ["OPENAI_API_KEY"]= "sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599" -os.environ["OPENAI_BASE_URL"]= "http://154.9.28.247:3000/v1/" - -from openai import OpenAI -client = OpenAI() - -from sklearn.metrics.pairwise import cosine_similarity - -def embedding(s:str): - if len(s)==0: - return - else: - return client.embeddings.create( - input=s, model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small - ).data[0].embedding - -a=embedding("I tend to draw fine distinctions between similar feelings (e.g., depressed and blue; annoyed and irritated).") -# b=embedding("我喜欢界定两种相似的情绪(如沮丧和忧伤,烦恼和被激怒)。") -# c=embedding("I like to define two similar emotions (e.g., frustration and sadness, annoyance and irritation).") - -print(a,"\n",len(a)) - -# s = cosine_similarity([a, b, c]) -# print(s) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..20b89bd --- /dev/null +++ b/main.py @@ -0,0 +1,10 @@ +from ItemRelate import main as relate +import json + +with open("TestFile/adhd.json") as json_file: + scale = json.load(json_file) + +que=relate(scale) + +for i in que: + print(i.a, i.b, i.sim)