Cyber-Risk-Management-System-with-RAG-based-Analysis/backend_Models.py at master · anticoding333/Cyber-Risk-Management-System-with-RAG-based-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import os


from dotenv import load_dotenv
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

def get_prompt(answers) -> str:
      q1, q2, q3, q4, q5, q6, q7, q8, q9, q10 = answers[:10]
      promot= f"""
 Organization Name: {q1}
• Primary Sector: {q2}
• Employee Count: {q3}
• Handles Sensitive Personal Data: {q4}
• Partners with Government Agencies: {q5}
• Remote Workforce: {q6}
• Documented Cybersecurity Policies: {q7}
• Regular Security Awareness Training: {q8}
• Stores or Processes Financial Information: {q9}
• History of Cybersecurity Incidents: {q10}
"""
      return promot,q2

def dynamic_prompt(chat_history: list) -> str:
    QA = []
    for i in range(0, len(chat_history), 2):
        if i+1 < len(chat_history):
            q = chat_history[i].content
            a = chat_history[i+1].content
            QA.append({"AI question": q, "Organization answer": a})

    AI_Q = "\n\n".join([f"Q: {item['AI question']}\nA: {item['Organization answer']}" for item in QA])
    return AI_Q


def Questions_chat(info:str,text:str,sector:str,chat_history:list) -> str:

        load_dotenv()

        if sector == "healthcare":
            vector_id = "chroma_Health_db"
        elif sector == "banking":
            vector_id = "chroma_Banking_db"

        currt_dir = os.path.dirname(os.path.abspath(__file__))
        persist_directory = os.path.join(currt_dir, "db",vector_id)

        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

        retriever= db.as_retriever(search_type="similarity",search_kwargs={"k": 5})

        llm = init_chat_model(model="gpt-4o")

        Cont_q_system_prompt="""
        Given the chat history and the latest user Answer,
        which might reference context in the chat history,
        generate a strandalone new question which can be understood and not related to the previous question nor the user answer.
        without the chat history. Do NOT repeat the question, just
        generate a new question. Do NOT use the user answer to generate the question. the chat history
        is provide as context to generate a new question no matter what the user answer to the previous question.
        If the user answer is not related to the question, never ask for clarification, just generate a new question.
        """

        cont_q_prompt=ChatPromptTemplate.from_messages(
            [
                ("system", Cont_q_system_prompt),
                MessagesPlaceholder(variable_name="chat_history"),
                ("human", "{input}"),
            ]

        )

        history_aware_retriever = create_history_aware_retriever(
            llm,retriever,cont_q_prompt
        )

        qa_sytem_prompt = f"""
        you are A Cyber Risk Analyst and you are going to ask a business, website or company owners a question about cyber risk management related to there organization.
        Use the following piesce on retrieved context and the organization info:{info}to ask the question.
        Use one sentences at most and keep the queston professional concise\n\n{{context}}.
        If the user answer is not related to the question, never ask for clarification, just generate a new question.
        When generating the question, never ever consider the user answer to generate the next question.
        the questio must not be related to the previous question or the user answer.
        """

        qa_prompt=ChatPromptTemplate.from_messages(
            [
                ("system", qa_sytem_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}")
            ]
        )

        question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

        rag= create_retrieval_chain(history_aware_retriever, question_answer_chain)


        result = rag.invoke(
                {
                "input": text,
                "chat_history": chat_history})


        answer = result["answer"]
        return answer


def Risk_Manager(ST:str,SR:str,SQ:str,DQ:str) -> str:


        load_dotenv()

        currt_dir = os.path.dirname(os.path.abspath(__file__))
        persist_directory = os.path.join(currt_dir, "db","chroma_rules_db")

        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
        llm = init_chat_model(model="gpt-4o")

        fix_prices="""
        Developer time time to fix a risk: 150JOD to 1200JOD
        License cost for a security tool: 400JOD to 1500JOD
        Cybersecurity training for employees: 1000JOD to 2500JOD per employee
"""

        NOTfix_prices="""
        Legal penalties for data breaches: 10000JOD to 70000JOD
        Loss of customer trust and potential revenue loss: 5000JOD to 50000JOD
        Breach recovery costs (forensics, legal fees, etc.): 2000JOD to 10000JOD
"""

        query = f"""
Consdering the following inputs:
AI Analyst Q&A: {DQ}
Scan Results: {SR}
Scan Type: {ST}
Organization information {SQ}

You are a cybersecurity risk management expert. Using the inputs below, produce a full Cyber Risk Management Plan structured exactly as follows (omit all examples):

1. **Listing All Risks**
   – Identify and list each risk discovered, drawing both from the scan results and from the AI-analyst Q&A.
 a. AI Analyst Q&A
 b. Scan Results
2. **Risk Classification**
   – For each risk, assign:
     • Category (Technical, Organizational, Legal, Physical)
     • Source (Human error, System flaw, External attack, Missing policy)
     • (Optionally) Relevant standard mapping (mainly use 'Jordanian cyber crime Company related laws' for mapping
      if not compatible then use other relevant standards) then mention from where you took each standard

3. **Risk Impact Assessment**
   – For each risk, assess:
     • Likelihood (Low = 1, Medium = 2, High = 3)
     • Impact (Low = 1, Medium = 2, High = 3)
     • Compute Risk Score = Likelihood × Impact and label it (1–3 = Low, 4 = Medium, 6+ = High)
     ** in the end SUM the risk score for all risks and label it as Low, Medium or High**

     **here calculate the compliance score = 100 - (Total Risk Score / (Number of Risk * Highest Risk Score) * 100 )  **

4. **Risk Prioritization (Matrix)**
   – Present a 3×3 matrix (Likelihood vs. Impact) and place each risk in its cell.
   - away form the matric as a table.


5. **Recommended Risk Treatment**
   – For each risk, recommend one of: Avoid, Reduce, Transfer, Accept.

6. **Business Case (Cost Analysis)**
   – For each major/high-priority risk calculate and compare:
     • Cost to Fix (Treatment Cost)
       **– Use the following cost ranges:{fix_prices}**
     • Cost If Not Treated (Breach/Loss Cost)
       **– Use the following cost ranges:{NOTfix_prices}**
   – Conclude treatment recommendation based on cost comparison.


"""

        retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

        relevant_docs = retriever.invoke(query)

        combined_input= (""
        + query
        + "\n\n Relevant documents:\n"
        + "\n\n".join([doc.page_content for doc in relevant_docs])
        + "\n\n please answer the question based on the provided documents.\n\n"
        )


        message=[
            SystemMessage(content="You are Jordanian Legal Consultant"),
            HumanMessage(content=combined_input)

        ]

        response = llm.invoke(message)

        return response.content