Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ postgres_data/
*.key
*.pub

*/**/.rpc_secret

# Sqlite extensions
*.sqlite
*.sqlite-shm
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ version: '3.7'

services:
app:
# platform: linux/x86_64
build: service_app/
depends_on:
- db
Expand Down
1,001 changes: 1,001 additions & 0 deletions experiments/Restaurant_Reviews.tsv

Large diffs are not rendered by default.

2,175 changes: 2,175 additions & 0 deletions experiments/Untitled.ipynb

Large diffs are not rendered by default.

15 changes: 9 additions & 6 deletions service_app/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ ENV PORT=4000
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

FROM python:3.10-alpine AS build-image
FROM python:3.10-slim-buster AS build-image
WORKDIR /app
COPY --from=compile-image /opt/venv /opt/venv
RUN apk upgrade --no-cache && \
apk add --no-cache postgresql-client bash openssl libgcc libstdc++ ncurses-libs
# COPY ./src .
# RUN apk upgrade --no-cache && \
# apk add --no-cache postgresql-client bash openssl libgcc libstdc++ ncurses-libs
# COPY .env .

# Make sure we use the virtualenv:
ENV PATH="/opt/venv/bin:$PATH"
# ENV DATABASE_URL="sqlite://db.sqlite"
# EXPOSE 4000
#
CMD uvicorn app.api:app --reload --host 0.0.0.0 --port $PORT

# CMD uvicorn app.api:app --reload --host 0.0.0.0 --port $PORT
#
# FROM python:3.10
# COPY requirements.txt .
# RUN pip install --no-cache-dir -r requirements.txt
# COPY ./src .
3 changes: 3 additions & 0 deletions service_app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ python-multipart
facebook-scraper
firebase-admin<7
pyrebase4
joblib<=1.1.0
bertopic
textherox==1.2.0
95 changes: 83 additions & 12 deletions service_app/src/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
DatapointOut_Pydantic,
)

from .db.schemas import DualResourceActionResponse, SingleResourceActionResponse, CreateDatasetBody, SignUpRequestBody, SignInRequestBody
from .db.schemas import DualResourceActionResponse, SingleResourceActionResponse, CreateDatasetBody, SignUpRequestBody, SignInRequestBody, LineByLineTextInput, VerifyTokenInput
from .auth.security import get_password_hash
from tortoise.contrib.fastapi import register_tortoise
from dotenv import load_dotenv
Expand All @@ -34,15 +34,19 @@
import pyrebase
import json

from typing import List
from typing import List, Dict
from bertopic import BERTopic
import texthero as hero
import pandas as pd

load_dotenv() # take environment variables from .env.

app = FastAPI()

# Allowed origins
origins = [
"http://localhost:3000",
"http://127.0.0.1:3333",
"http://localhost:3333",
]

app.add_middleware(
Expand All @@ -64,6 +68,11 @@

# Setup supabase authentication

def api_authen(token):
user = supabase.auth.api.get_user(jwt=token)
if user.aud and user.aud == "authenticated":
return True
return False

# signup endpoint
@app.post("/signup")
Expand All @@ -89,6 +98,31 @@ async def signup(req: SignUpRequestBody):
except Exception as e:
return HTTPException(detail={'message': str(e)}, status_code=400)

@app.post("/api/v1/user")
async def get_user_base_on_token(req: VerifyTokenInput):
token: str = req.token
user = supabase.auth.api.get_user(jwt=token)
return {"aud": user.aud, "email": user.email }

# Only required email for signup at this time
# password: str =
#
# if email is None or password is None:
# return HTTPException(detail={'message': 'Error! Missing Email or Password'}, status_code=400)
# try:
# user = auth.create_user(
# email=email,
# password=password
# )
# return JSONResponse(content={'message': f'Successfully created user {user.uid}'}, status_code=200)
# except Exception as e:
# return HTTPException(detail={'message': str(e)}, status_code=400)

try:
user: Dict[str, Any] = supabase.auth.sign_up(email=email)
return JSONResponse(content={'message': f'Successfully created user {user.uid}'}, status_code=200)
except Exception as e:
return HTTPException(detail={'message': str(e)}, status_code=400)

@app.post("/signin")
async def signin(request: SignInRequestBody):
Expand All @@ -107,13 +141,10 @@ async def signin(request: SignInRequestBody):
except Exception as e:
return HTTPException(detail={'message': str(e)}, status_code=400)

@app.post("/ping")
async def validate(request: Request):
headers = request.headers
jwt = headers.get('authorization')
print(f"jwt:{jwt}")
user = auth.verify_id_token(jwt)
return user["uid"]
@app.get("/api/v1/ping")
async def ping(token: str = Depends(oauth2_scheme)):
return {"result": "Cool"}


@app.post("/login", include_in_schema=False)
async def login(request: SignUpRequestBody):
Expand All @@ -133,11 +164,10 @@ async def read_root():
return "Please go to /docs to read the documentation"

@app.post("/api/v1/workflows", response_model = WorkflowOut_Pydantic)
async def create_workflow(workflow: WorkflowIn_Pydantic, token: str = Depends(check_auth)):
async def create_workflow(workflow: WorkflowIn_Pydantic):
"""
Create a new Workflow
"""
supabase.
workflow_obj = await Workflows.create(**workflow.dict(exclude_unset=True))
return await WorkflowOut_Pydantic.from_tortoise_orm(workflow_obj)

Expand Down Expand Up @@ -265,6 +295,47 @@ async def list_datapoints_from_dataset(dataset_id, token: str = Depends(oauth2_s
return queryset


@app.post("/api/v1/apps/topic_model/process")
async def run_process_clustering(req: VerifyTokenInput, payload: LineByLineTextInput):
"""
Process a request data
"""
authenticated = api_authen(req.token)
if authenticated:
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2")
docs = payload.text_data.split("\n")

# Preprocess
clean_docs = pd.Series(docs).pipe(hero.clean)

topics, probs = topic_model.fit_transform(clean_docs)

num_topics = max(topics)
sentences_by_topics = {topic_id: {"data":[],"topic": []} for topic_id in range(num_topics+1)}

for doc, topic_id in zip(docs, topics):
# topic "-1" are stopwords
if topic_id == -1: continue
sentences_by_topics[topic_id]["data"].append(doc)

for topic_id in range(num_topics+1):
# Get keywords
#keywords = topic_model.get_topic_info(topic_id).Name.values[0].split("_")[1:]
topic = topic_model.get_topic(topic_id)

# convert to percentage
topic = [(t[0].capitalize(), round(t[1]*100,2)) for t in topic]
sentences_by_topics[topic_id]["topic"] = topic
sentences_by_topics[topic_id]["count"] = len(sentences_by_topics[topic_id]["data"])


# Change to list
sentences_by_topics = [topic_items for (_, topic_items) in sentences_by_topics.items()]

return sentences_by_topics
return []


# return await Datapoints.all().prefetch_related(Prefetch(dataset, queryset))
#
#### POSTPONED UNTIL HAVE USERS MANAGEMENT
Expand Down
10 changes: 10 additions & 0 deletions service_app/src/app/db/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,13 @@ class SignInRequestBody(BaseModel):
email: str
password: str

class LineByLineTextInput(BaseModel):
"""
This is first line\nThis is second line

"""
text_data: str

class VerifyTokenInput(BaseModel):
token: str

2 changes: 1 addition & 1 deletion service_auth/.rpc_secret
Original file line number Diff line number Diff line change
@@ -1 +1 @@
hevenuzchkgl
bhnfcbucmxox
Binary file modified service_auth/auth
Binary file not shown.
2 changes: 1 addition & 1 deletion service_auth/quorum.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
43635c40-6ba3-4f7b-ad96-c31f15ce69af
ecaedbb5-7b61-4955-8c44-89beaccb1273