Teamable-Analytics · Opeyem1a · Jan 20, 2024 · Jan 20, 2024 · Jan 20, 2024 · Jan 20, 2024
@@ -0,0 +1,3 @@
+[submodule "api/ai/group_matcher_algorithm/group-matcher"]
+	path = api/ai/group_matcher_algorithm/group-matcher
+	url = https://github.com/ketphan02/group-matcher.git
@@ -7,6 +7,7 @@
     WeightAlgorithmConfig,
     SocialAlgorithmConfig,
     PriorityAlgorithmConfig,
+    GroupMatcherAlgorithmConfig,
 )
 from api.ai.interfaces.algorithm_options import (
     RandomAlgorithmOptions,
@@ -16,6 +17,7 @@
     MultipleRoundRobinAlgorithmOptions,
     GeneralizedEnvyGraphAlgorithmOptions,
     DoubleRoundRobinAlgorithmOptions,
+    GroupMatcherAlgorithmOptions,
 )
 from api.ai.interfaces.team_generation_options import TeamGenerationOptions
 from api.ai.multiple_round_robin_with_adjusted_winner_algorithm.mrr_algorithm import (
@@ -28,6 +30,7 @@
 from api.ai.double_round_robin_algorithm.double_round_robin_algorithm import (
     DoubleRoundRobinAlgorithm,
 )
+from api.ai.group_matcher_algorithm.group_matcher_algorithm import GroupMatcherAlgorithm
 from api.models.enums import AlgorithmType
 from api.models.student import Student
 from api.models.team_set import TeamSet
@@ -78,6 +81,8 @@ def get_algorithm_from_type(algorithm_type: AlgorithmType):
             return GeneralizedEnvyGraphAlgorithm
         if algorithm_type == AlgorithmType.DRR:
             return DoubleRoundRobinAlgorithm
+        if algorithm_type == AlgorithmType.GROUP_MATCHER:
+            return GroupMatcherAlgorithm
 
         raise NotImplementedError(
             f"Algorithm type {algorithm_type} is not associated with an algorithm class!"
@@ -99,6 +104,8 @@ def get_algorithm_option_class(algorithm_type: AlgorithmType):
             return GeneralizedEnvyGraphAlgorithmOptions
         if algorithm_type == AlgorithmType.DRR:
             return DoubleRoundRobinAlgorithmOptions
+        if algorithm_type == AlgorithmType.GROUP_MATCHER:
+            return GroupMatcherAlgorithmOptions
 
         raise NotImplementedError(
             f"Algorithm type {algorithm_type} is not associated with an algorithm options class!"
@@ -120,6 +127,8 @@ def get_algorithm_config_class(algorithm_type: AlgorithmType):
             return None
         if algorithm_type == AlgorithmType.DRR:
             return None
+        if algorithm_type == AlgorithmType.GROUP_MATCHER:
+            return GroupMatcherAlgorithmConfig
 
         raise NotImplementedError(
             f"Algorithm type {algorithm_type} is not associated with an algorithm config class!"

@@ -0,0 +1,78 @@
+import csv
+import os
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+
+from api.ai.interfaces.algorithm import Algorithm
+from api.ai.interfaces.algorithm_config import GroupMatcherAlgorithmConfig
+from api.ai.interfaces.algorithm_options import GroupMatcherAlgorithmOptions
+from api.ai.interfaces.team_generation_options import TeamGenerationOptions
+from api.models.enums import ScenarioAttribute, fromAlRaceToRace, fromAlGenderToGender
+from api.models.student import Student
+from api.models.team import Team
+from api.models.team_set import TeamSet
+
+
+class GroupMatcherAlgorithm(Algorithm):
+    def __init__(
+        self,
+        algorithm_options: GroupMatcherAlgorithmOptions,
+        team_generation_options: TeamGenerationOptions,
+        algorithm_config: GroupMatcherAlgorithmConfig,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(algorithm_options, team_generation_options, algorithm_config)
+        self.csv_input_path = algorithm_config.csv_input_path
+        self.group_matcher_run_path = algorithm_config.group_matcher_run_path
+        self.outpath = Path.cwd() / "out-private.csv"
+        if self.outpath.exists():
+            self.outpath.unlink()
+        self.config_file_path = (
+            Path(self.group_matcher_run_path).parent / "example_config.py"
+        )
+
+        self.team_trace = {team.id: team for team_idx, team in enumerate(self.teams)}
+
+        if not self.csv_input_path.parent.exists():
+            self.csv_input_path.parent.mkdir(parents=True)
+
+    def prepare(self, students: List[Student]) -> None:
+        student_data = [student.to_group_matcher_data_format() for student in students]
+        with open(self.csv_input_path, "w") as csvfile:
+            writer = csv.DictWriter(
+                csvfile, fieldnames=student_data[0].keys(), delimiter=";"
+            )
+            writer.writeheader()
+            writer.writerows(student_data)
+
+    def generate(self, students: List[Student]) -> TeamSet:
+        # Run the group matcher algorithm
+        cmd = f"python3 {self.group_matcher_run_path} {self.config_file_path} {self.csv_input_path}"
+        cmd_output = os.system(cmd)
+
+        # Read the output csv file and create a TeamSet
+        df = pd.read_csv(self.outpath)
+        for _, row in df.iterrows():
+            new_student = Student(
+                _id=row["sid"],
+                name=row["first_name"] + " " + row["last_name"],
+                attributes={
+                    ScenarioAttribute.YEAR_LEVEL.value: [int(row["year"])],
+                    ScenarioAttribute.RACE.value: [
+                        fromAlRaceToRace(int(row["race"])).value
+                    ],
+                    ScenarioAttribute.GENDER.value: [
+                        fromAlGenderToGender(int(row["gender"])).value
+                    ],
+                    ScenarioAttribute.TIMESLOT_AVAILABILITY.value: list(
+                        map(int, row["disc_times_options"].strip("[']").split(","))
+                    ),
+                },
+            )
+
+            self.team_trace[int(row["group_num"]) + 1].add_student(new_student)
+
+        return TeamSet(teams=self.teams)
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Callable, Tuple, List
 
 from api.models.student import Student
@@ -47,10 +48,10 @@ def validate(self):
 
 @dataclass
 class PriorityAlgorithmConfig(AlgorithmConfig):
-    MAX_KEEP: int = 3  # nodes
-    MAX_SPREAD: int = 3  # nodes
-    MAX_ITERATE: int = 1500  # iterations
-    MAX_TIME: int = 30  # seconds
+    MAX_KEEP: int = 15  # nodes
+    MAX_SPREAD: int = 30  # nodes
+    MAX_ITERATE: int = 30  # iterations
+    MAX_TIME: int = 10000000  # seconds
 
     """
     Specifies the mutations as a list of [mutation_function, number_team_sets_generated_this_way]
@@ -86,19 +87,44 @@ def validate(self):
 class MultipleRoundRobinAlgorithmConfig(AlgorithmConfig):
     utility_function: Callable[[Student, TeamShell], float]
 
+    def __init__(self, utility_function: Callable[[Student, TeamShell], float]):
+        super().__init__()
+        self.utility_function = utility_function
+
     def validate(self):
         super().validate()
 
 
 class DoubleRoundRobinAlgorithmConfig(AlgorithmConfig):
     utility_function: Callable[[Student, TeamShell], float]
 
+    def __init__(self, utility_function: Callable[[Student, TeamShell], float]):
+        super().__init__()
+        self.utility_function = utility_function
+
     def validate(self):
         super().validate()
 
 
 class GeneralizedEnvyGraphAlgorithmConfig(AlgorithmConfig):
     utility_function: Callable[[Student, TeamShell], float]
 
+    def __init__(self, utility_function: Callable[[Student, TeamShell], float]):
+        super().__init__()
+        self.utility_function = utility_function
+
+    def validate(self):
+        super().validate()
+
+
+class GroupMatcherAlgorithmConfig(AlgorithmConfig):
+    csv_input_path: Path
+    group_matcher_run_path: Path
+
+    def __init__(self, csv_output_path: Path, group_matcher_run_path: Path):
+        super().__init__()
+        self.csv_input_path = csv_output_path
+        self.group_matcher_run_path = group_matcher_run_path
+
     def validate(self):
         super().validate()
@@ -295,6 +295,20 @@ def get_schema() -> Schema:
         raise NotImplementedError
 
 
+@dataclass
+class GroupMatcherAlgorithmOptions(AlgorithmOptions):
+    def validate(self):
+        super().validate()
+
+    @staticmethod
+    def parse_json(_: Dict[str, Any]):
+        raise NotImplementedError
+
+    @staticmethod
+    def get_schema() -> Schema:
+        raise NotImplementedError
+
+
 AnyAlgorithmOptions = Union[
     RandomAlgorithmOptions,
     WeightAlgorithmOptions,
@@ -303,4 +317,5 @@ def get_schema() -> Schema:
     MultipleRoundRobinAlgorithmOptions,
     GeneralizedEnvyGraphAlgorithmOptions,
     DoubleRoundRobinAlgorithmOptions,
+    GroupMatcherAlgorithmOptions,
 ]
@@ -38,6 +38,7 @@ class AlgorithmType(Enum):
     MRR = "multiple_round_robin_with_adjusted_winner"
     GEG = "generalized_envy_graph"
     DRR = "double_round_robin"
+    GROUP_MATCHER = "group_matcher"
 
 
 class TokenizationConstraintDirection(Enum):
@@ -124,3 +125,122 @@ class PriorityType(Enum):
     PROJECT_PREFERENCE = "project_preference"
     PROJECT_REQUIREMENT = "project_requirement"
     SOCIAL_PREFERENCE = "social_preference"
+
+
+# From paper: https://sigcse2023.sigcse.org/details/sigcse-ts-2023-papers/163/Inclusive-study-group-formation-at-scale
+class AlRace(AttributeValueEnum):
+    White = "White"
+    Asian = "Asian"
+    Hispanic = "Hispanic"
+    Black_Or_African_American = "Black/African American"
+    Indegenous = "Indegenous"
+    Middle_Eastern = "Middle-Eastern"
+    Multiple_Races = "Multiple races"
+
+
+class AlYearLevel(AttributeValueEnum):
+    Freshman = "freshman"
+    Sophomore = "sophomore"
+    Junior = "junior"
+    Senior = "senior"
+    Graduate = "graduate"
+
+
+class AlGender(AttributeValueEnum):
+    Female = "Female"
+    Male = "Male"
+    Other = "Other"
+
+
+def fromGenderToAlGender(gender: Gender) -> AlGender:
+    if gender == Gender.MALE:
+        return AlGender.Male
+    if gender == Gender.FEMALE:
+        return AlGender.Female
+    return AlGender.Other
+
+
+def fromAlGenderToGender(alGenderNum: int or str) -> Gender:
+    if alGenderNum == 0 or alGenderNum == "Male":
+        return Gender.FEMALE
+    if alGenderNum == 1 or alGenderNum == "Female":
+        return Gender.MALE
+    return Gender.OTHER
+
+
+def fromRaceToAlRace(race: Race) -> AlRace:
+    if race == Race.European:
+        return AlRace.White
+    if (
+        race == Race.South_Asian
+        or race == Race.East_Asian
+        or race == Race.South_East_Asian
+    ):
+        return AlRace.Asian
+    if race == Race.Hispanic_or_Latin_American:
+        return AlRace.Hispanic
+    if race == Race.African:
+        return AlRace.Black_Or_African_American
+    if race == Race.First_Nations_or_Indigenous:
+        return AlRace.Indegenous
+    if race == Race.Middle_Eastern:
+        return AlRace.Middle_Eastern
+    if race == Race.Other:
+        return AlRace.Multiple_Races
+
+
+def fromAlRaceToRace(alRaceNum: int or str) -> Race:
+    if alRaceNum == 0 or alRaceNum == "White":
+        return Race.European
+    if alRaceNum == 1 or alRaceNum == "Asian":
+        return Race.South_Asian
+    if alRaceNum == 2 or alRaceNum == "Hispanic":
+        return Race.Hispanic_or_Latin_American
+    if alRaceNum == 3 or alRaceNum == "Black/African American":
+        return Race.African
+    if alRaceNum == 4 or alRaceNum == "Indegenous":
+        return Race.First_Nations_or_Indigenous
+    if alRaceNum == 5 or alRaceNum == "Middle-Eastern":
+        return Race.Middle_Eastern
+    if alRaceNum == 6 or alRaceNum == "Multiple races":
+        return Race.Other
+
+
+def fromYearLevelToAlYearLevel(yearLevel: int) -> AlYearLevel:
+    if yearLevel == 0:
+        return AlYearLevel.Freshman
+    if yearLevel == 1:
+        return AlYearLevel.Sophomore
+    if yearLevel == 2:
+        return AlYearLevel.Junior
+    if yearLevel == 3:
+        return AlYearLevel.Senior
+    return AlYearLevel.Graduate
+
+
+def fromAlYearLevelToYearLevel(alYearLevel: str) -> int:
+    if "freshman" in alYearLevel.lower():
+        return 0
+    if "sophomore" in alYearLevel.lower():
+        return 1
+    if "junior" in alYearLevel.lower():
+        return 2
+    if "senior" in alYearLevel.lower():
+        return 3
+    return 4
+
+
+def fromNumbersToTimeSlots(numbers: List[int]) -> List[str]:
+    return [fromNumberToTimeslot(number) for number in numbers]
+
+
+def fromNumberToTimeslot(number: int) -> str:
+    return str(number)
+
+
+def fromTimeslotToNumber(timeslot: str) -> int:
+    return int(timeslot)
+
+
+def fromTimeslotsToNumbers(timeslots: List[str]) -> List[int]:
+    return [fromTimeslotToNumber(timeslot) for timeslot in timeslots]