Skip to content

Commit 7bc9ddd

Browse files
authored
Merge pull request #1 from SSMIF-Quant/feature/wrappers
added functions that should be implemented
2 parents ed4285d + ae88219 commit 7bc9ddd

File tree

2 files changed

+98
-6
lines changed

2 files changed

+98
-6
lines changed

.github/workflows/lint.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ jobs:
2424
2525
- name: Check formatting
2626
run: |
27-
black clickhouse/ --check
27+
black dataloader/ --check
2828
2929
- name: Run linter
3030
run: |
31-
pylint --fail-under=9.5 clickhouse/
31+
pylint --fail-under=9.5 dataloader/
3232
3333
- name: Type check
3434
run: |
35-
mypy clickhouse/
35+
mypy dataloader/

dataloader/loader.py

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class DataLoader:
2424
client: ClassVar[Client] = Manager.get_connection()
2525

2626
@classmethod
27-
def get_data(
27+
def query(
2828
cls,
2929
source: str,
3030
columns_list: Optional[List[str]] = None,
@@ -165,7 +165,7 @@ def _resolve_columns(
165165
return ", ".join(selected_cols)
166166

167167
@classmethod
168-
def show_tables(cls) -> List[str]:
168+
def tables(cls) -> List[str]:
169169
"""
170170
Returns available tables.
171171
"""
@@ -174,10 +174,102 @@ def show_tables(cls) -> List[str]:
174174
return df["name"].tolist()
175175

176176
@classmethod
177-
def show_table_column(cls, source: str) -> List[str]:
177+
def fields(cls, source: str) -> List[str]:
178178
"""
179179
Returns all columns for a given table.
180180
"""
181181
query = f"SHOW COLUMNS FROM {cls.database}.{source}"
182182
df = cls.client.query(query)
183183
return df["field"].tolist()
184+
185+
@classmethod
186+
def all(cls, source: str) -> pd.DataFrame:
187+
"""Fetch entire table."""
188+
raise NotImplementedError
189+
190+
@classmethod
191+
def columns(cls, source: str) -> pd.DataFrame:
192+
"""Select specific columns from source."""
193+
raise NotImplementedError
194+
195+
@classmethod
196+
def head(cls, source: str, n: int = 10) -> pd.DataFrame:
197+
"""Get first N rows."""
198+
raise NotImplementedError
199+
200+
@classmethod
201+
def paginate(cls, source: str, limit: int, offset: int) -> pd.DataFrame:
202+
"""Get paginated results."""
203+
raise NotImplementedError
204+
205+
@classmethod
206+
def filter(cls, source: str, **kwargs) -> pd.DataFrame:
207+
"""
208+
Simple equality filters. Pass column=value pairs.
209+
210+
Example:
211+
DataLoader.filter('equities', symbol='AAPL', date_start='2024-01-01')
212+
"""
213+
raise NotImplementedError
214+
215+
@classmethod
216+
def match_pattern(cls, source: str, pattern: str) -> List[str]:
217+
"""Get columns matching a pattern."""
218+
raise NotImplementedError
219+
220+
@classmethod
221+
def select_pattern(cls, source: str, pattern: str, **filters) -> pd.DataFrame:
222+
"""Select columns matching a pattern with optional filters."""
223+
raise NotImplementedError
224+
225+
@classmethod
226+
def date_range(
227+
cls, source: str, start_date: str, end_date: str, **additional_filters
228+
) -> pd.DataFrame:
229+
"""Get data between two dates (YYYY-MM-DD format)."""
230+
raise NotImplementedError
231+
232+
@classmethod
233+
def first_date(cls, source: str) -> pd.Timestamp:
234+
"""Return the earliest date in the table."""
235+
raise NotImplementedError
236+
237+
@classmethod
238+
def last_date(cls, source: str) -> pd.Timestamp:
239+
"""Return the latest date in the table."""
240+
raise NotImplementedError
241+
242+
@classmethod
243+
def latest(cls, source: str, n: int = 1) -> pd.DataFrame:
244+
"""Return the last N rows per symbol or table."""
245+
raise NotImplementedError
246+
247+
@classmethod
248+
def describe(cls, source: str) -> pd.DataFrame:
249+
"""Return column types, non-null counts, basic stats."""
250+
raise NotImplementedError
251+
252+
@classmethod
253+
def column_types(cls, source: str) -> Dict[str, str]:
254+
"""Return data types for each column in a table."""
255+
raise NotImplementedError
256+
257+
@classmethod
258+
def stream(cls, source: str, batch_size: int = 10000):
259+
"""
260+
Yield data in chunks of batch_size.
261+
Example usage:
262+
for df_chunk in DataLoader.stream('equities', 5000):
263+
process(df_chunk)
264+
"""
265+
raise NotImplementedError
266+
267+
@classmethod
268+
def iter_chunks(cls, source: str, chunk_size: int = 10000):
269+
"""Alias for stream."""
270+
return cls.stream(source, batch_size=chunk_size)
271+
272+
@classmethod
273+
def batch_query(cls, sources: List[str], filters: Optional[Dict[str, Any]] = None):
274+
"""Query multiple tables or symbols in a single call."""
275+
raise NotImplementedError

0 commit comments

Comments
 (0)