Skip to content

Commit f0b60b8

Browse files
committed
Merge branch 'update/document_module' into feature/doc-transform
2 parents e929370 + c388423 commit f0b60b8

26 files changed

Lines changed: 399 additions & 183 deletions
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Add is_deleted column in document
2+
3+
Revision ID: 42c4c8b22a09
4+
Revises: 38f0e8c8dc92
5+
Create Date: 2025-08-22 15:16:29.489991
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
import sqlmodel.sql.sqltypes
11+
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "42c4c8b22a09"
15+
down_revision = "38f0e8c8dc92"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# ### commands auto generated by Alembic - please adjust! ###
22+
op.add_column("document", sa.Column("project_id", sa.Integer(), nullable=False))
23+
op.add_column("document", sa.Column("is_deleted", sa.Boolean(), nullable=False))
24+
25+
# Must Do --> Think about how we will execute this migration on current DB
26+
# Dropping Owner id, adding values to project id column and more idk rn
27+
28+
op.drop_constraint("document_owner_id_fkey", "document", type_="foreignkey")
29+
op.create_foreign_key(
30+
None, "document", "project", ["project_id"], ["id"], ondelete="CASCADE"
31+
)
32+
op.drop_column("document", "owner_id")
33+
34+
# ### end Alembic commands ###
35+
36+
37+
def downgrade():
38+
# ### commands auto generated by Alembic - please adjust! ###
39+
op.add_column(
40+
"document",
41+
sa.Column("owner_id", sa.INTEGER(), autoincrement=False, nullable=False),
42+
)
43+
op.drop_constraint(None, "document", type_="foreignkey")
44+
op.create_foreign_key(
45+
"document_owner_id_fkey",
46+
"document",
47+
"user",
48+
["owner_id"],
49+
["id"],
50+
ondelete="CASCADE",
51+
)
52+
op.drop_column("document", "is_deleted")
53+
op.drop_column("document", "project_id")
54+
# ### end Alembic commands ###
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Add storage_path column in project table
2+
3+
Revision ID: b79fc198879a
4+
Revises: 42c4c8b22a09
5+
Create Date: 2025-08-22 16:23:28.162616
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
import sqlmodel.sql.sqltypes
11+
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "b79fc198879a"
15+
down_revision = "42c4c8b22a09"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# ### commands auto generated by Alembic - please adjust! ###
22+
op.add_column("project", sa.Column("storage_path", sa.Uuid(), nullable=False))
23+
op.create_unique_constraint(None, "project", ["storage_path"])
24+
# ### end Alembic commands ###
25+
26+
27+
def downgrade():
28+
# ### commands auto generated by Alembic - please adjust! ###
29+
op.drop_constraint(None, "project", type_="unique")
30+
op.drop_column("project", "storage_path")
31+
# ### end Alembic commands ###

backend/app/api/routes/collections.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
DocumentCollectionCrud,
2525
)
2626
from app.crud.rag import OpenAIVectorStoreCrud, OpenAIAssistantCrud
27-
from app.models import Collection, Document
27+
from app.models import Collection, Document, DocumentPublic
2828
from app.models.collection import CollectionStatus
2929
from app.utils import APIResponse, load_description, get_openai_client
3030

@@ -225,8 +225,8 @@ def do_create_collection(
225225
else WebHookCallback(request.callback_url, payload)
226226
)
227227

228-
storage = AmazonCloudStorage(current_user)
229-
document_crud = DocumentCrud(session, current_user.id)
228+
storage = AmazonCloudStorage(current_user.project_id)
229+
document_crud = DocumentCrud(session, current_user.project_id)
230230
assistant_crud = OpenAIAssistantCrud(client)
231231
vector_store_crud = OpenAIVectorStoreCrud(client)
232232
collection_crud = CollectionCrud(session, current_user.id)
@@ -423,7 +423,7 @@ def list_collections(
423423
@router.post(
424424
"/docs/{collection_id}",
425425
description=load_description("collections/docs.md"),
426-
response_model=APIResponse[List[Document]],
426+
response_model=APIResponse[List[DocumentPublic]],
427427
)
428428
def collection_documents(
429429
session: SessionDep,

backend/app/api/routes/documents.py

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
from typing import List, Optional
44
from pathlib import Path
55

6-
from fastapi import APIRouter, File, UploadFile, Query, Form, BackgroundTasks
6+
from fastapi import APIRouter, File, UploadFile, Query, Form, BackgroundTasks, HTTPException
77
from fastapi import Path as FastPath
88
from fastapi.responses import JSONResponse
99
from fastapi import HTTPException
1010

11-
from app.crud import DocumentCrud, CollectionCrud
12-
from app.models import Document
11+
from app.crud import DocumentCrud, CollectionCrud, get_project_by_id
12+
from app.models import Document, DocumentPublic, Message
1313
from app.utils import APIResponse, load_description, get_openai_client
1414
from app.api.deps import CurrentUser, SessionDep, CurrentUserOrgProject
1515
from app.core.cloud import AmazonCloudStorage
@@ -29,27 +29,27 @@
2929
@router.get(
3030
"/list",
3131
description=load_description("documents/list.md"),
32-
response_model=APIResponse[List[Document]],
32+
response_model=APIResponse[List[DocumentPublic]],
3333
)
3434
def list_docs(
3535
session: SessionDep,
36-
current_user: CurrentUser,
36+
current_user: CurrentUserOrgProject,
3737
skip: int = Query(0, ge=0),
3838
limit: int = Query(100, gt=0, le=100),
3939
):
40-
crud = DocumentCrud(session, current_user.id)
40+
crud = DocumentCrud(session, current_user.project_id)
4141
data = crud.read_many(skip, limit)
4242
return APIResponse.success_response(data)
4343

4444

4545
@router.post(
4646
"/upload",
4747
description=load_description("documents/upload.md"),
48-
response_model=APIResponse[Document],
48+
response_model=APIResponse[DocumentPublic],
4949
)
5050
async def upload_doc(
5151
session: SessionDep,
52-
current_user: CurrentUser,
52+
current_user: CurrentUserOrgProject,
5353
src: UploadFile = File(...),
5454
background_tasks: BackgroundTasks = None,
5555
target_format: Optional[str] = Form(None),
@@ -62,10 +62,16 @@ async def upload_doc(
6262
raise HTTPException(status_code=400, detail=str(e))
6363

6464
# Upload the original document first
65-
storage = AmazonCloudStorage(current_user)
65+
storage = AmazonCloudStorage(current_user.project_id)
6666
document_id = uuid4()
67-
object_store_url = storage.put(src, Path(str(document_id)))
68-
crud = DocumentCrud(session, current_user.id)
67+
68+
project = get_project_by_id(session=session, project_id=current_user.project_id)
69+
if project is None:
70+
raise HTTPException(404, "Project not found")
71+
72+
key = Path(str(project.storage_path), str(document_id))
73+
object_store_url = storage.put(src, key)
74+
crud = DocumentCrud(session, current_user.project_id)
6975
document = Document(
7076
id=document_id,
7177
fname=src.filename,
@@ -123,10 +129,10 @@ async def upload_doc(
123129
)
124130

125131

126-
@router.get(
132+
@router.delete(
127133
"/remove/{doc_id}",
128134
description=load_description("documents/delete.md"),
129-
response_model=APIResponse[Document],
135+
response_model=APIResponse[Message],
130136
)
131137
def remove_doc(
132138
session: SessionDep,
@@ -138,18 +144,21 @@ def remove_doc(
138144
)
139145

140146
a_crud = OpenAIAssistantCrud(client)
141-
d_crud = DocumentCrud(session, current_user.id)
147+
d_crud = DocumentCrud(session, current_user.project_id)
142148
c_crud = CollectionCrud(session, current_user.id)
143149

144150
document = d_crud.delete(doc_id)
145151
data = c_crud.delete(document, a_crud)
146-
return APIResponse.success_response(data)
152+
153+
return APIResponse.success_response(
154+
Message(message="Document Deleted Successfully")
155+
)
147156

148157

149158
@router.delete(
150159
"/remove/{doc_id}/permanent",
151160
description=load_description("documents/permanent_delete.md"),
152-
response_model=APIResponse[Document],
161+
response_model=APIResponse[Message],
153162
)
154163
def permanent_delete_doc(
155164
session: SessionDep,
@@ -161,9 +170,9 @@ def permanent_delete_doc(
161170
)
162171

163172
a_crud = OpenAIAssistantCrud(client)
164-
d_crud = DocumentCrud(session, current_user.id)
173+
d_crud = DocumentCrud(session, current_user.project_id)
165174
c_crud = CollectionCrud(session, current_user.id)
166-
storage = AmazonCloudStorage(current_user)
175+
storage = AmazonCloudStorage(current_user.project_id)
167176

168177
document = d_crud.read_one(doc_id)
169178

@@ -172,19 +181,31 @@ def permanent_delete_doc(
172181
storage.delete(document.object_store_url)
173182
d_crud.delete(doc_id)
174183

175-
return APIResponse.success_response(document)
184+
return APIResponse.success_response(
185+
Message(message="Document Permanently Deleted Successfully")
186+
)
176187

177188

178189
@router.get(
179190
"/info/{doc_id}",
180191
description=load_description("documents/info.md"),
181-
response_model=APIResponse[Document],
192+
response_model=APIResponse[DocumentPublic],
182193
)
183194
def doc_info(
184195
session: SessionDep,
185-
current_user: CurrentUser,
196+
current_user: CurrentUserOrgProject,
186197
doc_id: UUID = FastPath(description="Document to retrieve"),
198+
include_url: bool = Query(
199+
False, description="Include a signed URL to access the document"
200+
),
187201
):
188-
crud = DocumentCrud(session, current_user.id)
189-
data = crud.read_one(doc_id)
190-
return APIResponse.success_response(data)
202+
crud = DocumentCrud(session, current_user.project_id)
203+
document = crud.read_one(doc_id)
204+
205+
doc_schema = DocumentPublic.model_validate(document, from_attributes=True)
206+
207+
if include_url:
208+
storage = AmazonCloudStorage(current_user.project_id)
209+
doc_schema.signed_url = storage.get_signed_url(document.object_store_url)
210+
211+
return APIResponse.success_response(doc_schema)

backend/app/core/cloud/storage.py

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from botocore.exceptions import ClientError
1111
from botocore.response import StreamingBody
1212

13-
from app.api.deps import CurrentUser
13+
from app.models import UserProjectOrg
1414
from app.core.config import settings
1515
from app.utils import mask_string
1616

@@ -108,8 +108,8 @@ def from_url(cls, url: str):
108108

109109

110110
class CloudStorage:
111-
def __init__(self, user: CurrentUser):
112-
self.user = user
111+
def __init__(self, project_id: int):
112+
self.project_id = project_id
113113

114114
def put(self, source: UploadFile, basename: str):
115115
raise NotImplementedError()
@@ -119,12 +119,11 @@ def stream(self, url: str) -> StreamingBody:
119119

120120

121121
class AmazonCloudStorage(CloudStorage):
122-
def __init__(self, user: CurrentUser):
123-
super().__init__(user)
122+
def __init__(self, project_id: int):
123+
super().__init__(project_id)
124124
self.aws = AmazonCloudStorageClient()
125125

126-
def put(self, source: UploadFile, basename: Path) -> SimpleStorageName:
127-
key = Path(str(self.user.id), basename)
126+
def put(self, source: UploadFile, key: Path) -> SimpleStorageName:
128127
destination = SimpleStorageName(str(key))
129128
kwargs = asdict(destination)
130129

@@ -138,12 +137,12 @@ def put(self, source: UploadFile, basename: Path) -> SimpleStorageName:
138137
)
139138
logger.info(
140139
f"[AmazonCloudStorage.put] File uploaded successfully | "
141-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}'}}"
140+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}'}}"
142141
)
143142
except ClientError as err:
144143
logger.error(
145144
f"[AmazonCloudStorage.put] AWS upload error | "
146-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}",
145+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}",
147146
exc_info=True,
148147
)
149148
raise CloudStorageError(f'AWS Error: "{err}"') from err
@@ -157,13 +156,13 @@ def stream(self, url: str) -> StreamingBody:
157156
body = self.aws.client.get_object(**kwargs).get("Body")
158157
logger.info(
159158
f"[AmazonCloudStorage.stream] File streamed successfully | "
160-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}"
159+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}"
161160
)
162161
return body
163162
except ClientError as err:
164163
logger.error(
165164
f"[AmazonCloudStorage.stream] AWS stream error | "
166-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
165+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
167166
exc_info=True,
168167
)
169168
raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err
@@ -177,13 +176,40 @@ def get_file_size_kb(self, url: str) -> float:
177176
size_kb = round(size_bytes / 1024, 2)
178177
logger.info(
179178
f"[AmazonCloudStorage.get_file_size_kb] File size retrieved successfully | "
180-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'size_kb': {size_kb}}}"
179+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'size_kb': {size_kb}}}"
181180
)
182181
return size_kb
183182
except ClientError as err:
184183
logger.error(
185184
f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | "
186-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
185+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
186+
exc_info=True,
187+
)
188+
raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err
189+
190+
def get_signed_url(self, url: str, expires_in: int = 3600) -> str:
191+
"""
192+
Generate a signed S3 URL for the given file.
193+
:param url: S3 url (e.g., s3://bucket/key)
194+
:param expires_in: Expiry time in seconds (default: 1 hour)
195+
:return: Signed URL as string
196+
"""
197+
name = SimpleStorageName.from_url(url)
198+
try:
199+
signed_url = self.aws.client.generate_presigned_url(
200+
"get_object",
201+
Params={"Bucket": name.Bucket, "Key": name.Key},
202+
ExpiresIn=expires_in,
203+
)
204+
logger.info(
205+
f"[AmazonCloudStorage.get_signed_url] Signed URL generated | "
206+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}"
207+
)
208+
return signed_url
209+
except ClientError as err:
210+
logger.error(
211+
f"[AmazonCloudStorage.get_signed_url] AWS presign error | "
212+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
187213
exc_info=True,
188214
)
189215
raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err
@@ -195,12 +221,12 @@ def delete(self, url: str) -> None:
195221
self.aws.client.delete_object(**kwargs)
196222
logger.info(
197223
f"[AmazonCloudStorage.delete] File deleted successfully | "
198-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}"
224+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}"
199225
)
200226
except ClientError as err:
201227
logger.error(
202228
f"[AmazonCloudStorage.delete] AWS delete error | "
203-
f"{{'user_id': '{self.user.id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
229+
f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}",
204230
exc_info=True,
205231
)
206232
raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err

0 commit comments

Comments
 (0)