diff --git a/.semversioner/next-release/patch-20260212211908142161.json b/.semversioner/next-release/patch-20260212211908142161.json new file mode 100644 index 0000000000..2aad5983ce --- /dev/null +++ b/.semversioner/next-release/patch-20260212211908142161.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Remove NetworkX dependency from graph utilities; move to DataFrame-based implementations in graphrag.graphs package." +} diff --git a/cspell.config.yaml b/cspell.config.yaml index 587ccdc28a..84768e7fa8 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -21,7 +21,7 @@ ignorePaths: - pyproject.toml - entity_extraction.txt - package.json - - tests/fixtures/ + - tests/ - examples_notebooks/inputs/ - docs/examples_notebooks/inputs/ - "*.csv" diff --git a/dictionary.txt b/dictionary.txt index 5d30e2e850..0324937a23 100644 --- a/dictionary.txt +++ b/dictionary.txt @@ -45,6 +45,7 @@ PROPN Syntatic ents INTJ +rels # Libraries Langchain diff --git a/packages/graphrag/graphrag/graphs/__init__.py b/packages/graphrag/graphrag/graphs/__init__.py new file mode 100644 index 0000000000..cbf5b3d8f5 --- /dev/null +++ b/packages/graphrag/graphrag/graphs/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Graph utilities that operate on DataFrames instead of NetworkX objects.""" diff --git a/packages/graphrag/graphrag/graphs/compute_degree.py b/packages/graphrag/graphrag/graphs/compute_degree.py new file mode 100644 index 0000000000..ca0f398945 --- /dev/null +++ b/packages/graphrag/graphrag/graphs/compute_degree.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Compute node degree directly from a relationships DataFrame.""" + +import pandas as pd + + +def compute_degree( + relationships: pd.DataFrame, + source_column: str = "source", + target_column: str = "target", +) -> pd.DataFrame: + """Compute the degree of each node from an edge list DataFrame. + + Degree is the number of edges connected to a node (counting both + source and target appearances). + + Parameters + ---------- + relationships : pd.DataFrame + Edge list with at least source and target columns. + source_column : str + Name of the source node column. + target_column : str + Name of the target node column. + + Returns + ------- + pd.DataFrame + DataFrame with columns ["title", "degree"]. + """ + # Normalize edge direction so (A,B) and (B,A) are treated as the same + # undirected edge, matching NetworkX Graph behavior. + edges = relationships[[source_column, target_column]].copy() + edges["_lo"] = edges.min(axis=1) + edges["_hi"] = edges.max(axis=1) + edges = edges.drop_duplicates(subset=["_lo", "_hi"]) + + source_counts = edges[source_column].value_counts() + target_counts = edges[target_column].value_counts() + degree = source_counts.add(target_counts, fill_value=0).astype(int) + return pd.DataFrame({"title": degree.index, "degree": degree.to_numpy()}) diff --git a/packages/graphrag/graphrag/graphs/connected_components.py b/packages/graphrag/graphrag/graphs/connected_components.py new file mode 100644 index 0000000000..346e7a264b --- /dev/null +++ b/packages/graphrag/graphrag/graphs/connected_components.py @@ -0,0 +1,93 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Find connected components and the largest connected component from an edge list DataFrame.""" + +import pandas as pd + + +def connected_components( + relationships: pd.DataFrame, + source_column: str = "source", + target_column: str = "target", +) -> list[set[str]]: + """Return all connected components as a list of node-title sets. + + Uses union-find on the deduplicated edge list. + + Parameters + ---------- + relationships : pd.DataFrame + Edge list with at least source and target columns. + source_column : str + Name of the source node column. + target_column : str + Name of the target node column. + + Returns + ------- + list[set[str]] + Each element is a set of node titles belonging to one component, + sorted by descending component size. + """ + edges = relationships.drop_duplicates(subset=[source_column, target_column]) + + # Initialize every node as its own parent + all_nodes = pd.concat( + [edges[source_column], edges[target_column]], ignore_index=True + ).unique() + parent: dict[str, str] = {node: node for node in all_nodes} + + def find(x: str) -> str: + while parent[x] != x: + parent[x] = parent[parent[x]] # path compression + x = parent[x] + return x + + def union(a: str, b: str) -> None: + ra, rb = find(a), find(b) + if ra != rb: + parent[ra] = rb + + # Union each edge + for src, tgt in zip(edges[source_column], edges[target_column], strict=True): + union(src, tgt) + + # Group by root + groups: dict[str, set[str]] = {} + for node in parent: + root = find(node) + groups.setdefault(root, set()).add(node) + + return sorted(groups.values(), key=len, reverse=True) + + +def largest_connected_component( + relationships: pd.DataFrame, + source_column: str = "source", + target_column: str = "target", +) -> set[str]: + """Return the node titles belonging to the largest connected component. + + Parameters + ---------- + relationships : pd.DataFrame + Edge list with at least source and target columns. + source_column : str + Name of the source node column. + target_column : str + Name of the target node column. + + Returns + ------- + set[str] + The set of node titles in the largest connected component. + """ + components = connected_components( + relationships, + source_column=source_column, + target_column=target_column, + ) + if not components: + return set() + return components[0] diff --git a/packages/graphrag/graphrag/graphs/edge_weights.py b/packages/graphrag/graphrag/graphs/edge_weights.py new file mode 100644 index 0000000000..e2f2b49530 --- /dev/null +++ b/packages/graphrag/graphrag/graphs/edge_weights.py @@ -0,0 +1,101 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Edge weight calculation utilities (PMI, RRF).""" + +import numpy as np +import pandas as pd + + +def calculate_pmi_edge_weights( + nodes_df: pd.DataFrame, + edges_df: pd.DataFrame, + node_name_col: str = "title", + node_freq_col: str = "frequency", + edge_weight_col: str = "weight", + edge_source_col: str = "source", + edge_target_col: str = "target", +) -> pd.DataFrame: + """Calculate pointwise mutual information (PMI) edge weights. + + Uses a variant of PMI that accounts for bias towards low-frequency events. + pmi(x,y) = p(x,y) * log2(p(x,y)/ (p(x)*p(y)) + p(x,y) = edge_weight(x,y) / total_edge_weights + p(x) = freq_occurrence(x) / total_freq_occurrences. + """ + copied_nodes_df = nodes_df[[node_name_col, node_freq_col]] + + total_edge_weights = edges_df[edge_weight_col].sum() + total_freq_occurrences = nodes_df[node_freq_col].sum() + copied_nodes_df["prop_occurrence"] = ( + copied_nodes_df[node_freq_col] / total_freq_occurrences + ) + copied_nodes_df = copied_nodes_df.loc[:, [node_name_col, "prop_occurrence"]] + + edges_df["prop_weight"] = edges_df[edge_weight_col] / total_edge_weights + edges_df = ( + edges_df + .merge( + copied_nodes_df, + left_on=edge_source_col, + right_on=node_name_col, + how="left", + ) + .drop(columns=[node_name_col]) + .rename(columns={"prop_occurrence": "source_prop"}) + ) + edges_df = ( + edges_df + .merge( + copied_nodes_df, + left_on=edge_target_col, + right_on=node_name_col, + how="left", + ) + .drop(columns=[node_name_col]) + .rename(columns={"prop_occurrence": "target_prop"}) + ) + edges_df[edge_weight_col] = edges_df["prop_weight"] * np.log2( + edges_df["prop_weight"] / (edges_df["source_prop"] * edges_df["target_prop"]) + ) + + return edges_df.drop(columns=["prop_weight", "source_prop", "target_prop"]) + + +def calculate_rrf_edge_weights( + nodes_df: pd.DataFrame, + edges_df: pd.DataFrame, + node_name_col: str = "title", + node_freq_col: str = "freq", + edge_weight_col: str = "weight", + edge_source_col: str = "source", + edge_target_col: str = "target", + rrf_smoothing_factor: int = 60, +) -> pd.DataFrame: + """Calculate reciprocal rank fusion (RRF) edge weights. + + Combines PMI weight and combined freq of source and target. + """ + edges_df = calculate_pmi_edge_weights( + nodes_df, + edges_df, + node_name_col, + node_freq_col, + edge_weight_col, + edge_source_col, + edge_target_col, + ) + + edges_df["pmi_rank"] = edges_df[edge_weight_col].rank(method="min", ascending=False) + edges_df["raw_weight_rank"] = edges_df[edge_weight_col].rank( + method="min", ascending=False + ) + edges_df[edge_weight_col] = edges_df.apply( + lambda x: ( + (1 / (rrf_smoothing_factor + x["pmi_rank"])) + + (1 / (rrf_smoothing_factor + x["raw_weight_rank"])) + ), + axis=1, + ) + + return edges_df.drop(columns=["pmi_rank", "raw_weight_rank"]) diff --git a/packages/graphrag/graphrag/graphs/hierarchical_leiden.py b/packages/graphrag/graphrag/graphs/hierarchical_leiden.py new file mode 100644 index 0000000000..ab4e09c67d --- /dev/null +++ b/packages/graphrag/graphrag/graphs/hierarchical_leiden.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Hierarchical Leiden clustering on edge lists.""" + +from typing import Any + +import graspologic_native as gn + + +def hierarchical_leiden( + edges: list[tuple[str, str, float]], + max_cluster_size: int = 10, + random_seed: int | None = 0xDEADBEEF, +) -> list[gn.HierarchicalCluster]: + """Run hierarchical leiden on an edge list.""" + return gn.hierarchical_leiden( + edges=edges, + max_cluster_size=max_cluster_size, + seed=random_seed, + starting_communities=None, + resolution=1.0, + randomness=0.001, + use_modularity=True, + iterations=1, + ) + + +def first_level_hierarchical_clustering( + hcs: list[gn.HierarchicalCluster], +) -> dict[Any, int]: + """Return the initial leiden clustering as a dict of node id to community id. + + Returns + ------- + dict[Any, int] + The initial leiden algorithm clustering results as a dictionary + of node id to community id. + """ + return {entry.node: entry.cluster for entry in hcs if entry.level == 0} + + +def final_level_hierarchical_clustering( + hcs: list[gn.HierarchicalCluster], +) -> dict[Any, int]: + """Return the final leiden clustering as a dict of node id to community id. + + Returns + ------- + dict[Any, int] + The last leiden algorithm clustering results as a dictionary + of node id to community id. + """ + return {entry.node: entry.cluster for entry in hcs if entry.is_final_cluster} diff --git a/packages/graphrag/graphrag/graphs/modularity.py b/packages/graphrag/graphrag/graphs/modularity.py new file mode 100644 index 0000000000..e121ec1623 --- /dev/null +++ b/packages/graphrag/graphrag/graphs/modularity.py @@ -0,0 +1,295 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Compute graph modularity directly from an edge list DataFrame.""" + +import logging +import math +from collections import defaultdict + +import pandas as pd + +from graphrag.config.enums import ModularityMetric +from graphrag.graphs.connected_components import ( + connected_components, + largest_connected_component, +) +from graphrag.graphs.hierarchical_leiden import ( + final_level_hierarchical_clustering, + first_level_hierarchical_clustering, + hierarchical_leiden, +) + +logger = logging.getLogger(__name__) + + +def _df_to_edge_list( + edges: pd.DataFrame, + source_column: str = "source", + target_column: str = "target", + weight_column: str = "weight", +) -> list[tuple[str, str, float]]: + """Convert a relationships DataFrame to a sorted edge list. + + Normalizes direction and deduplicates so each undirected edge appears + once, keeping the last occurrence's weight (matching NX behavior). + """ + df = edges[[source_column, target_column, weight_column]].copy() + lo = df[[source_column, target_column]].min(axis=1) + hi = df[[source_column, target_column]].max(axis=1) + df = df.assign(**{source_column: lo, target_column: hi}) + df = df.drop_duplicates(subset=[source_column, target_column], keep="last") + return sorted( + (str(row[source_column]), str(row[target_column]), float(row[weight_column])) + for _, row in df.iterrows() + ) + + +def modularity( + edges: pd.DataFrame, + partitions: dict[str, int], + source_column: str = "source", + target_column: str = "target", + weight_column: str = "weight", + resolution: float = 1.0, +) -> float: + """Calculate modularity of a graph given community assignments. + + Parameters + ---------- + edges : pd.DataFrame + Edge list with at least source, target, and weight columns. + partitions : dict[str, int] + Mapping from node title to community id. + source_column : str + Name of the source node column. + target_column : str + Name of the target node column. + weight_column : str + Name of the edge weight column. + resolution : float + Resolution parameter for modularity calculation. + + Returns + ------- + float + The modularity score. + """ + components = _modularity_components( + edges, partitions, source_column, target_column, weight_column, resolution + ) + return sum(components.values()) + + +def _modularity_component( + intra_community_degree: float, + total_community_degree: float, + network_degree_sum: float, + resolution: float, +) -> float: + """Compute the modularity contribution of a single community.""" + community_degree_ratio = math.pow(total_community_degree, 2.0) / ( + 2.0 * network_degree_sum + ) + return (intra_community_degree - resolution * community_degree_ratio) / ( + 2.0 * network_degree_sum + ) + + +def _modularity_components( + edges: pd.DataFrame, + partitions: dict[str, int], + source_column: str = "source", + target_column: str = "target", + weight_column: str = "weight", + resolution: float = 1.0, +) -> dict[int, float]: + """Calculate per-community modularity components from an edge list. + + Edges are treated as undirected: direction is normalized and duplicates + are removed (keeping the last occurrence's weight, matching NX behavior). + """ + # Normalize direction and deduplicate so each undirected edge is counted once + df = edges[[source_column, target_column, weight_column]].copy() + lo = df[[source_column, target_column]].min(axis=1) + hi = df[[source_column, target_column]].max(axis=1) + df = df.assign(**{source_column: lo, target_column: hi}) + df = df.drop_duplicates(subset=[source_column, target_column], keep="last") + + total_edge_weight = 0.0 + communities = set(partitions.values()) + + degree_sums_within: dict[int, float] = defaultdict(float) + degree_sums_for: dict[int, float] = defaultdict(float) + + for _, row in df.iterrows(): + src = str(row[source_column]) + tgt = str(row[target_column]) + weight = float(row[weight_column]) + + src_comm = partitions[src] + tgt_comm = partitions[tgt] + + if src_comm == tgt_comm: + if src == tgt: + degree_sums_within[src_comm] += weight + else: + degree_sums_within[src_comm] += weight * 2.0 + + degree_sums_for[src_comm] += weight + degree_sums_for[tgt_comm] += weight + total_edge_weight += weight + + if total_edge_weight == 0.0: + return dict.fromkeys(communities, 0.0) + + return { + comm: _modularity_component( + degree_sums_within[comm], + degree_sums_for[comm], + total_edge_weight, + resolution, + ) + for comm in communities + } + + +def calculate_root_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, +) -> float: + """Calculate modularity of the graph's root clusters.""" + edge_list = _df_to_edge_list(edges) + hcs = hierarchical_leiden( + edge_list, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + ) + root_clusters = first_level_hierarchical_clustering(hcs) + return modularity(edges, root_clusters) + + +def calculate_leaf_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, +) -> float: + """Calculate modularity of the graph's leaf clusters.""" + edge_list = _df_to_edge_list(edges) + hcs = hierarchical_leiden( + edge_list, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + ) + leaf_clusters = final_level_hierarchical_clustering(hcs) + return modularity(edges, leaf_clusters) + + +def calculate_graph_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, + use_root_modularity: bool = True, +) -> float: + """Calculate modularity of the whole graph.""" + if use_root_modularity: + return calculate_root_modularity( + edges, max_cluster_size=max_cluster_size, random_seed=random_seed + ) + return calculate_leaf_modularity( + edges, max_cluster_size=max_cluster_size, random_seed=random_seed + ) + + +def calculate_lcc_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, + use_root_modularity: bool = True, +) -> float: + """Calculate modularity of the largest connected component of the graph.""" + lcc_nodes = largest_connected_component(edges) + lcc_edges = edges[edges["source"].isin(lcc_nodes) & edges["target"].isin(lcc_nodes)] + if use_root_modularity: + return calculate_root_modularity( + lcc_edges, max_cluster_size=max_cluster_size, random_seed=random_seed + ) + return calculate_leaf_modularity( + lcc_edges, max_cluster_size=max_cluster_size, random_seed=random_seed + ) + + +def calculate_weighted_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, + min_connected_component_size: int = 10, + use_root_modularity: bool = True, +) -> float: + """Calculate weighted modularity of components larger than *min_connected_component_size*. + + Modularity = sum(component_modularity * component_size) / total_nodes. + """ + components = connected_components(edges) + filtered = [c for c in components if len(c) > min_connected_component_size] + if len(filtered) == 0: + # Fall back to the whole graph + filtered = [set(edges["source"].unique()).union(set(edges["target"].unique()))] + + total_nodes = sum(len(c) for c in filtered) + total_modularity = 0.0 + for component in filtered: + if len(component) > min_connected_component_size: + sub_edges = edges[ + edges["source"].isin(component) & edges["target"].isin(component) + ] + if use_root_modularity: + mod = calculate_root_modularity( + sub_edges, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + ) + else: + mod = calculate_leaf_modularity( + sub_edges, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + ) + total_modularity += mod * len(component) / total_nodes + return total_modularity + + +def calculate_modularity( + edges: pd.DataFrame, + max_cluster_size: int = 10, + random_seed: int = 0xDEADBEEF, + use_root_modularity: bool = True, + modularity_metric: ModularityMetric = ModularityMetric.WeightedComponents, +) -> float: + """Calculate modularity of the graph based on the modularity metric type.""" + match modularity_metric: + case ModularityMetric.Graph: + logger.info("Calculating graph modularity") + return calculate_graph_modularity( + edges, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + use_root_modularity=use_root_modularity, + ) + case ModularityMetric.LCC: + logger.info("Calculating LCC modularity") + return calculate_lcc_modularity( + edges, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + use_root_modularity=use_root_modularity, + ) + case ModularityMetric.WeightedComponents: + logger.info("Calculating weighted-components modularity") + return calculate_weighted_modularity( + edges, + max_cluster_size=max_cluster_size, + random_seed=random_seed, + use_root_modularity=use_root_modularity, + ) diff --git a/packages/graphrag/graphrag/graphs/stable_lcc.py b/packages/graphrag/graphrag/graphs/stable_lcc.py new file mode 100644 index 0000000000..aa01044eaf --- /dev/null +++ b/packages/graphrag/graphrag/graphs/stable_lcc.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Produce a stable largest connected component from a relationships DataFrame. + +"Stable" means the same input edge list always produces the same output, +regardless of the original row order. This is achieved by: + +1. Filtering to the largest connected component. +2. Normalizing node names (HTML unescape, uppercase, strip). +3. Sorting edges so the lesser node is always the source. +4. Sorting edges alphabetically for deterministic row order. +""" + +import html + +import pandas as pd + +from graphrag.graphs.connected_components import largest_connected_component + + +def stable_lcc( + relationships: pd.DataFrame, + source_column: str = "source", + target_column: str = "target", +) -> pd.DataFrame: + """Return the relationships DataFrame filtered to a stable largest connected component. + + Parameters + ---------- + relationships : pd.DataFrame + Edge list with at least source and target columns. + source_column : str + Name of the source node column. + target_column : str + Name of the target node column. + + Returns + ------- + pd.DataFrame + A copy of the input filtered to the LCC with normalized node names + and deterministic edge ordering. + """ + if relationships.empty: + return relationships.copy() + + # 1. Normalize node names + edges = relationships.copy() + edges[source_column] = edges[source_column].apply(_normalize_name) + edges[target_column] = edges[target_column].apply(_normalize_name) + + # 2. Filter to the largest connected component + lcc_nodes = largest_connected_component( + edges, source_column=source_column, target_column=target_column + ) + edges = edges[ + edges[source_column].isin(lcc_nodes) & edges[target_column].isin(lcc_nodes) + ] + + # 3. Stabilize edge direction: lesser node always first + swapped = edges[source_column] > edges[target_column] + edges.loc[swapped, [source_column, target_column]] = edges.loc[ + swapped, [target_column, source_column] + ].to_numpy() + + # 4. Deduplicate edges that were reversed pairs in the original data + edges = edges.drop_duplicates(subset=[source_column, target_column]) + + # 5. Sort for deterministic order + return edges.sort_values([source_column, target_column]).reset_index(drop=True) + + +def _normalize_name(name: str) -> str: + """Normalize a node name: HTML unescape, uppercase, strip whitespace.""" + return html.unescape(name).upper().strip() diff --git a/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py b/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py index 32c2ef7b9d..3a9f3e8f3d 100644 --- a/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py +++ b/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py @@ -10,11 +10,11 @@ from graphrag_cache import Cache from graphrag.config.enums import AsyncType +from graphrag.graphs.edge_weights import calculate_pmi_edge_weights from graphrag.index.operations.build_noun_graph.np_extractors.base import ( BaseNounPhraseExtractor, ) from graphrag.index.utils.derive_from_rows import derive_from_rows -from graphrag.index.utils.graphs import calculate_pmi_edge_weights from graphrag.index.utils.hashing import gen_sha512_hash diff --git a/packages/graphrag/graphrag/index/operations/cluster_graph.py b/packages/graphrag/graphrag/index/operations/cluster_graph.py index ff4632ea17..745d6b5148 100644 --- a/packages/graphrag/graphrag/index/operations/cluster_graph.py +++ b/packages/graphrag/graphrag/index/operations/cluster_graph.py @@ -5,10 +5,10 @@ import logging -import networkx as nx +import pandas as pd -from graphrag.index.utils.graphs import hierarchical_leiden -from graphrag.index.utils.stable_lcc import stable_largest_connected_component +from graphrag.graphs.hierarchical_leiden import hierarchical_leiden +from graphrag.graphs.stable_lcc import stable_lcc Communities = list[tuple[int, int, int, list[str]]] @@ -17,18 +17,14 @@ def cluster_graph( - graph: nx.Graph, + edges: pd.DataFrame, max_cluster_size: int, use_lcc: bool, seed: int | None = None, ) -> Communities: - """Apply a hierarchical clustering algorithm to a graph.""" - if len(graph.nodes) == 0: - logger.warning("Graph has no nodes") - return [] - + """Apply a hierarchical clustering algorithm to a relationships DataFrame.""" node_id_to_community_map, parent_mapping = _compute_leiden_communities( - graph=graph, + edges=edges, max_cluster_size=max_cluster_size, use_lcc=use_lcc, seed=seed, @@ -55,17 +51,32 @@ def cluster_graph( # Taken from graph_intelligence & adapted def _compute_leiden_communities( - graph: nx.Graph | nx.DiGraph, + edges: pd.DataFrame, max_cluster_size: int, use_lcc: bool, seed: int | None = None, ) -> tuple[dict[int, dict[str, int]], dict[int, int]]: """Return Leiden root communities and their hierarchy mapping.""" + edge_df = edges.copy() + + # Normalize edge direction and deduplicate (undirected graph). + # NX deduplicates reversed pairs keeping the last row's attributes, + # so we replicate that by normalizing direction then keeping last. + lo = edge_df[["source", "target"]].min(axis=1) + hi = edge_df[["source", "target"]].max(axis=1) + edge_df = edge_df.assign(source=lo, target=hi) + edge_df = edge_df.drop_duplicates(subset=["source", "target"], keep="last") + if use_lcc: - graph = stable_largest_connected_component(graph) + edge_df = stable_lcc(edge_df) + + edge_list: list[tuple[str, str, float]] = sorted( + (str(row["source"]), str(row["target"]), float(row.get("weight", 1.0))) + for _, row in edge_df.iterrows() + ) community_mapping = hierarchical_leiden( - graph, max_cluster_size=max_cluster_size, random_seed=seed + edge_list, max_cluster_size=max_cluster_size, random_seed=seed ) results: dict[int, dict[str, int]] = {} hierarchy: dict[int, int] = {} diff --git a/packages/graphrag/graphrag/index/operations/compute_degree.py b/packages/graphrag/graphrag/index/operations/compute_degree.py deleted file mode 100644 index 8de95ad2e2..0000000000 --- a/packages/graphrag/graphrag/index/operations/compute_degree.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2024 Microsoft Corporation. -# Licensed under the MIT License - -"""A module containing compute_degree method definition.""" - -import networkx as nx -import pandas as pd - - -def compute_degree(graph: nx.Graph) -> pd.DataFrame: - """Create a new DataFrame with the degree of each node in the graph.""" - return pd.DataFrame([ - {"title": node, "degree": int(degree)} - for node, degree in graph.degree # type: ignore - ]) diff --git a/packages/graphrag/graphrag/index/operations/create_graph.py b/packages/graphrag/graphrag/index/operations/create_graph.py deleted file mode 100644 index 54b63b70aa..0000000000 --- a/packages/graphrag/graphrag/index/operations/create_graph.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2024 Microsoft Corporation. -# Licensed under the MIT License - -"""A module containing create_graph definition.""" - -import networkx as nx -import pandas as pd - - -def create_graph( - edges: pd.DataFrame, - edge_attr: list[str | int] | None = None, - nodes: pd.DataFrame | None = None, - node_id: str = "title", -) -> nx.Graph: - """Create a networkx graph from nodes and edges dataframes.""" - graph = nx.from_pandas_edgelist(edges, edge_attr=edge_attr) - - if nodes is not None: - nodes.set_index(node_id, inplace=True) - graph.add_nodes_from((n, dict(d)) for n, d in nodes.iterrows()) - - return graph diff --git a/packages/graphrag/graphrag/index/operations/finalize_entities.py b/packages/graphrag/graphrag/index/operations/finalize_entities.py index 28ac2a55f0..71d6acc536 100644 --- a/packages/graphrag/graphrag/index/operations/finalize_entities.py +++ b/packages/graphrag/graphrag/index/operations/finalize_entities.py @@ -8,8 +8,7 @@ import pandas as pd from graphrag.data_model.schemas import ENTITIES_FINAL_COLUMNS -from graphrag.index.operations.compute_degree import compute_degree -from graphrag.index.operations.create_graph import create_graph +from graphrag.graphs.compute_degree import compute_degree def finalize_entities( @@ -17,8 +16,7 @@ def finalize_entities( relationships: pd.DataFrame, ) -> pd.DataFrame: """All the steps to transform final entities.""" - graph = create_graph(relationships, edge_attr=["weight"]) - degrees = compute_degree(graph) + degrees = compute_degree(relationships) final_entities = entities.merge(degrees, on="title", how="left").drop_duplicates( subset="title" ) diff --git a/packages/graphrag/graphrag/index/operations/finalize_relationships.py b/packages/graphrag/graphrag/index/operations/finalize_relationships.py index 21ba413667..d4d14d097f 100644 --- a/packages/graphrag/graphrag/index/operations/finalize_relationships.py +++ b/packages/graphrag/graphrag/index/operations/finalize_relationships.py @@ -8,19 +8,17 @@ import pandas as pd from graphrag.data_model.schemas import RELATIONSHIPS_FINAL_COLUMNS -from graphrag.index.operations.compute_degree import compute_degree +from graphrag.graphs.compute_degree import compute_degree from graphrag.index.operations.compute_edge_combined_degree import ( compute_edge_combined_degree, ) -from graphrag.index.operations.create_graph import create_graph def finalize_relationships( relationships: pd.DataFrame, ) -> pd.DataFrame: """All the steps to transform final relationships.""" - graph = create_graph(relationships, edge_attr=["weight"]) - degrees = compute_degree(graph) + degrees = compute_degree(relationships) final_relationships = relationships.drop_duplicates(subset=["source", "target"]) final_relationships["combined_degree"] = compute_edge_combined_degree( diff --git a/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py b/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py deleted file mode 100644 index 632eb12586..0000000000 --- a/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2024 Microsoft Corporation. -# Licensed under the MIT License - -"""A module containing graph_to_dataframes method definition.""" - -import networkx as nx -import pandas as pd - - -def graph_to_dataframes( - graph: nx.Graph, - node_columns: list[str] | None = None, - edge_columns: list[str] | None = None, - node_id: str = "title", -) -> tuple[pd.DataFrame, pd.DataFrame]: - """Deconstructs an nx.Graph into nodes and edges dataframes.""" - # nx graph nodes are a tuple, and creating a df from them results in the id being the index - nodes = pd.DataFrame.from_dict(dict(graph.nodes(data=True)), orient="index") - nodes[node_id] = nodes.index - nodes.reset_index(inplace=True, drop=True) - - edges = nx.to_pandas_edgelist(graph) - - # we don't deal in directed graphs, but we do need to ensure consistent ordering for df joins - # nx loses the initial ordering - edges["min_source"] = edges[["source", "target"]].min(axis=1) - edges["max_target"] = edges[["source", "target"]].max(axis=1) - edges = edges.drop(columns=["source", "target"]).rename( - columns={"min_source": "source", "max_target": "target"} # type: ignore - ) - - if node_columns: - nodes = nodes.loc[:, node_columns] - - if edge_columns: - edges = edges.loc[:, edge_columns] - - return (nodes, edges) diff --git a/packages/graphrag/graphrag/index/operations/prune_graph.py b/packages/graphrag/graphrag/index/operations/prune_graph.py index 11a5335b4e..a2f715bb9f 100644 --- a/packages/graphrag/graphrag/index/operations/prune_graph.py +++ b/packages/graphrag/graphrag/index/operations/prune_graph.py @@ -3,20 +3,17 @@ """Graph pruning.""" -from typing import TYPE_CHECKING, cast - -import networkx as nx import numpy as np +import pandas as pd import graphrag.data_model.schemas as schemas -from graphrag.index.utils.graphs import largest_connected_component - -if TYPE_CHECKING: - from networkx.classes.reportviews import DegreeView +from graphrag.graphs.compute_degree import compute_degree +from graphrag.graphs.connected_components import largest_connected_component def prune_graph( - graph: nx.Graph, + entities: pd.DataFrame, + relationships: pd.DataFrame, min_node_freq: int = 1, max_node_freq_std: float | None = None, min_node_degree: int = 1, @@ -24,67 +21,89 @@ def prune_graph( min_edge_weight_pct: float = 40, remove_ego_nodes: bool = False, lcc_only: bool = False, -) -> nx.Graph: - """Prune graph by removing nodes that are out of frequency/degree ranges and edges with low weights.""" - # remove ego nodes if needed - degree = cast("DegreeView", graph.degree) - degrees = list(degree()) # type: ignore - if remove_ego_nodes: - # ego node is one with highest degree - ego_node = max(degrees, key=lambda x: x[1]) - graph.remove_nodes_from([ego_node[0]]) - - # remove nodes that are not within the predefined degree range - graph.remove_nodes_from([ - node for node, degree in degrees if degree < min_node_degree - ]) - if max_node_degree_std is not None: - upper_threshold = _get_upper_threshold_by_std( - [degree for _, degree in degrees], max_node_degree_std - ) - graph.remove_nodes_from([ - node for node, degree in degrees if degree > upper_threshold - ]) - - # remove nodes that are not within the predefined frequency range - graph.remove_nodes_from([ - node - for node, data in graph.nodes(data=True) - if schemas.NODE_FREQUENCY not in data - or data[schemas.NODE_FREQUENCY] < min_node_freq - ]) - if max_node_freq_std is not None: - upper_threshold = _get_upper_threshold_by_std( - [data[schemas.NODE_FREQUENCY] for _, data in graph.nodes(data=True)], - max_node_freq_std, - ) - graph.remove_nodes_from([ - node - for node, data in graph.nodes(data=True) - if data[schemas.NODE_FREQUENCY] > upper_threshold - ]) - - # remove edges by min weight - if len(graph.edges) == 0: - return graph - - if min_edge_weight_pct > 0: - min_edge_weight = np.percentile( - [data[schemas.EDGE_WEIGHT] for _, _, data in graph.edges(data=True)], - min_edge_weight_pct, +) -> tuple[pd.DataFrame, pd.DataFrame]: + """Prune graph by removing out-of-range nodes and low-weight edges. + + Returns the pruned *entities* and *relationships* DataFrames. + """ + # -- Compute degrees from the original edge list -------------------------- + degree_df = compute_degree(relationships) + degree_map: dict[str, int] = dict( + zip(degree_df["title"], degree_df["degree"], strict=True) + ) + + # Entity-only nodes (isolated, degree 0) must also be present so that + # degree thresholds are computed over the same population as before. + entity_titles: set[str] = set(entities["title"]) + for t in entity_titles: + degree_map.setdefault(t, 0) + + degree_values = list(degree_map.values()) + nodes_to_remove: set[str] = set() + + # -- Ego node removal (highest degree) ------------------------------------ + if remove_ego_nodes and degree_map: + ego_node = max(degree_map, key=lambda n: degree_map[n]) + nodes_to_remove.add(ego_node) + + # -- Degree-based removal ------------------------------------------------- + for node, deg in degree_map.items(): + if deg < min_node_degree: + nodes_to_remove.add(node) + + if max_node_degree_std is not None and degree_values: + upper = _get_upper_threshold_by_std(degree_values, max_node_degree_std) + for node, deg in degree_map.items(): + if deg > upper: + nodes_to_remove.add(node) + + # -- Apply degree removals before frequency filtering --------------------- + # NX mutates sequentially, so frequency thresholds are computed over the + # set of entity nodes that survived degree-based removal. + remaining = entities[~entities["title"].isin(nodes_to_remove)] + + # -- Frequency-based removal ---------------------------------------------- + freq_col = schemas.NODE_FREQUENCY + if freq_col in remaining.columns: + low_freq = remaining.loc[remaining[freq_col] < min_node_freq, "title"] + nodes_to_remove.update(low_freq) + remaining = remaining[~remaining["title"].isin(nodes_to_remove)] + + if max_node_freq_std is not None and len(remaining) > 0: + freq_values = remaining[freq_col].tolist() + upper = _get_upper_threshold_by_std(freq_values, max_node_freq_std) + high_freq = remaining.loc[remaining[freq_col] > upper, "title"] + nodes_to_remove.update(high_freq) + + # -- Filter to surviving entity nodes ------------------------------------- + kept_titles = entity_titles - nodes_to_remove + pruned_entities = entities[entities["title"].isin(kept_titles)] + pruned_rels = relationships[ + relationships["source"].isin(kept_titles) + & relationships["target"].isin(kept_titles) + ] + + # -- Edge weight filtering ------------------------------------------------ + if ( + len(pruned_rels) > 0 + and min_edge_weight_pct > 0 + and schemas.EDGE_WEIGHT in pruned_rels.columns + ): + min_weight = np.percentile( + pruned_rels[schemas.EDGE_WEIGHT].to_numpy(), min_edge_weight_pct ) - graph.remove_edges_from([ - (source, target) - for source, target, data in graph.edges(data=True) - if source in graph.nodes() - and target in graph.nodes() - and data[schemas.EDGE_WEIGHT] < min_edge_weight - ]) - - if lcc_only: - return largest_connected_component(graph) - - return graph + pruned_rels = pruned_rels[pruned_rels[schemas.EDGE_WEIGHT] >= min_weight] + + # -- LCC ------------------------------------------------------------------ + if lcc_only and len(pruned_rels) > 0: + lcc_nodes = largest_connected_component(pruned_rels) + pruned_entities = pruned_entities[pruned_entities["title"].isin(lcc_nodes)] + pruned_rels = pruned_rels[ + pruned_rels["source"].isin(lcc_nodes) + & pruned_rels["target"].isin(lcc_nodes) + ] + + return pruned_entities.reset_index(drop=True), pruned_rels.reset_index(drop=True) def _get_upper_threshold_by_std( diff --git a/packages/graphrag/graphrag/index/operations/snapshot_graphml.py b/packages/graphrag/graphrag/index/operations/snapshot_graphml.py index 9124038401..16b2d1d574 100644 --- a/packages/graphrag/graphrag/index/operations/snapshot_graphml.py +++ b/packages/graphrag/graphrag/index/operations/snapshot_graphml.py @@ -4,14 +4,16 @@ """A module containing snapshot_graphml method definition.""" import networkx as nx +import pandas as pd from graphrag_storage import Storage async def snapshot_graphml( - input: str | nx.Graph, + edges: pd.DataFrame, name: str, storage: Storage, ) -> None: """Take a entire snapshot of a graph to standard graphml format.""" - graphml = input if isinstance(input, str) else "\n".join(nx.generate_graphml(input)) + graph = nx.from_pandas_edgelist(edges, edge_attr=["weight"]) + graphml = "\n".join(nx.generate_graphml(graph)) await storage.set(name + ".graphml", graphml) diff --git a/packages/graphrag/graphrag/index/utils/graphs.py b/packages/graphrag/graphrag/index/utils/graphs.py deleted file mode 100644 index 02f2693cbe..0000000000 --- a/packages/graphrag/graphrag/index/utils/graphs.py +++ /dev/null @@ -1,386 +0,0 @@ -# Copyright (c) 2024 Microsoft Corporation. -# Licensed under the MIT License - -""" -Collection of graph utility functions. - -These are largely copies/re-implementations of graspologic methods to avoid dependency issues. -""" - -import logging -import math -from collections import defaultdict -from typing import Any, cast - -import graspologic_native as gn -import networkx as nx -import numpy as np -import pandas as pd - -from graphrag.config.enums import ModularityMetric - -logger = logging.getLogger(__name__) - - -def largest_connected_component(graph: nx.Graph) -> nx.Graph: - """Return the largest connected component of the graph.""" - graph = graph.copy() - lcc_nodes = max(nx.connected_components(graph), key=len) - lcc = graph.subgraph(lcc_nodes).copy() - lcc.remove_nodes_from([n for n in lcc if n not in lcc_nodes]) - return cast("nx.Graph", lcc) - - -def _nx_to_edge_list( - graph: nx.Graph, - weight_attribute: str = "weight", - weight_default: float = 1.0, -) -> list[tuple[str, str, float]]: - """ - Convert an undirected, non-multigraph networkx graph to a list of edges. - - Each edge is represented as a tuple of (source_str, target_str, weight). - """ - edge_list: list[tuple[str, str, float]] = [] - - # Decide how to retrieve the weight data - edge_iter = graph.edges(data=weight_attribute, default=weight_default) # type: ignore - - for source, target, weight in edge_iter: - source_str = str(source) - target_str = str(target) - edge_list.append((source_str, target_str, float(weight))) - - return edge_list - - -def hierarchical_leiden( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int | None = 0xDEADBEEF, -) -> list[gn.HierarchicalCluster]: - """Run hierarchical leiden on the graph.""" - return gn.hierarchical_leiden( - edges=_nx_to_edge_list(graph), - max_cluster_size=max_cluster_size, - seed=random_seed, - starting_communities=None, - resolution=1.0, - randomness=0.001, - use_modularity=True, - iterations=1, - ) - - -def modularity( - graph: nx.Graph, - partitions: dict[Any, int], - weight_attribute: str = "weight", - resolution: float = 1.0, -) -> float: - """Given an undirected graph and a dictionary of vertices to community ids, calculate the modularity.""" - components = _modularity_components(graph, partitions, weight_attribute, resolution) - return sum(components.values()) - - -def _modularity_component( - intra_community_degree: float, - total_community_degree: float, - network_degree_sum: float, - resolution: float, -) -> float: - community_degree_ratio = math.pow(total_community_degree, 2.0) / ( - 2.0 * network_degree_sum - ) - return (intra_community_degree - resolution * community_degree_ratio) / ( - 2.0 * network_degree_sum - ) - - -def _modularity_components( - graph: nx.Graph, - partitions: dict[Any, int], - weight_attribute: str = "weight", - resolution: float = 1.0, -) -> dict[int, float]: - total_edge_weight = 0.0 - communities = set(partitions.values()) - - degree_sums_within_community: dict[int, float] = defaultdict(lambda: 0.0) - degree_sums_for_community: dict[int, float] = defaultdict(lambda: 0.0) - for vertex, neighbor_vertex, weight in graph.edges(data=weight_attribute): - vertex_community = partitions[vertex] - neighbor_community = partitions[neighbor_vertex] - if vertex_community == neighbor_community: - if vertex == neighbor_vertex: - degree_sums_within_community[vertex_community] += weight - else: - degree_sums_within_community[vertex_community] += weight * 2.0 - degree_sums_for_community[vertex_community] += weight - degree_sums_for_community[neighbor_community] += weight - total_edge_weight += weight - - return { - comm: _modularity_component( - degree_sums_within_community[comm], - degree_sums_for_community[comm], - total_edge_weight, - resolution, - ) - for comm in communities - } - - -def calculate_root_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, -) -> float: - """Calculate distance between the modularity of the graph's root clusters and the target modularity.""" - hcs = hierarchical_leiden( - graph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - root_clusters = first_level_hierarchical_clustering(hcs) - return modularity(graph, root_clusters) - - -def calculate_leaf_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, -) -> float: - """Calculate distance between the modularity of the graph's leaf clusters and the target modularity.""" - hcs = hierarchical_leiden( - graph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - leaf_clusters = final_level_hierarchical_clustering(hcs) - return modularity(graph, leaf_clusters) - - -def calculate_graph_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, - use_root_modularity: bool = True, -) -> float: - """Calculate modularity of the whole graph.""" - if use_root_modularity: - return calculate_root_modularity( - graph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - return calculate_leaf_modularity( - graph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - - -def calculate_lcc_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, - use_root_modularity: bool = True, -) -> float: - """Calculate modularity of the largest connected component of the graph.""" - lcc = cast("nx.Graph", largest_connected_component(graph)) - if use_root_modularity: - return calculate_root_modularity( - lcc, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - return calculate_leaf_modularity( - lcc, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - - -def calculate_weighted_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, - min_connected_component_size: int = 10, - use_root_modularity: bool = True, -) -> float: - """ - Calculate weighted modularity of all connected components with size greater than min_connected_component_size. - - Modularity = sum(component_modularity * component_size) / total_nodes. - """ - connected_components: list[set] = list(nx.connected_components(graph)) - filtered_components = [ - component - for component in connected_components - if len(component) > min_connected_component_size - ] - if len(filtered_components) == 0: - filtered_components = [graph] - - total_nodes = sum(len(component) for component in filtered_components) - total_modularity = 0 - for component in filtered_components: - if len(component) > min_connected_component_size: - subgraph = graph.subgraph(component) - if use_root_modularity: - modularity = calculate_root_modularity( - subgraph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - else: - modularity = calculate_leaf_modularity( - subgraph, max_cluster_size=max_cluster_size, random_seed=random_seed - ) - total_modularity += modularity * len(component) / total_nodes - return total_modularity - - -def calculate_modularity( - graph: nx.Graph, - max_cluster_size: int = 10, - random_seed: int = 0xDEADBEEF, - use_root_modularity: bool = True, - modularity_metric: ModularityMetric = ModularityMetric.WeightedComponents, -) -> float: - """Calculate modularity of the graph based on the modularity metric type.""" - match modularity_metric: - case ModularityMetric.Graph: - logger.info("Calculating graph modularity") - return calculate_graph_modularity( - graph, - max_cluster_size=max_cluster_size, - random_seed=random_seed, - use_root_modularity=use_root_modularity, - ) - case ModularityMetric.LCC: - logger.info("Calculating LCC modularity") - return calculate_lcc_modularity( - graph, - max_cluster_size=max_cluster_size, - random_seed=random_seed, - use_root_modularity=use_root_modularity, - ) - case ModularityMetric.WeightedComponents: - logger.info("Calculating weighted-components modularity") - return calculate_weighted_modularity( - graph, - max_cluster_size=max_cluster_size, - random_seed=random_seed, - use_root_modularity=use_root_modularity, - ) - - -def calculate_pmi_edge_weights( - nodes_df: pd.DataFrame, - edges_df: pd.DataFrame, - node_name_col: str = "title", - node_freq_col: str = "frequency", - edge_weight_col: str = "weight", - edge_source_col: str = "source", - edge_target_col: str = "target", -) -> pd.DataFrame: - """ - Calculate pointwise mutual information (PMI) edge weights. - - Uses a variant of PMI that accounts for bias towards low-frequency events. - pmi(x,y) = p(x,y) * log2(p(x,y)/ (p(x)*p(y)) - p(x,y) = edge_weight(x,y) / total_edge_weights - p(x) = freq_occurrence(x) / total_freq_occurrences. - - """ - copied_nodes_df = nodes_df[[node_name_col, node_freq_col]] - - total_edge_weights = edges_df[edge_weight_col].sum() - total_freq_occurrences = nodes_df[node_freq_col].sum() - copied_nodes_df["prop_occurrence"] = ( - copied_nodes_df[node_freq_col] / total_freq_occurrences - ) - copied_nodes_df = copied_nodes_df.loc[:, [node_name_col, "prop_occurrence"]] - - edges_df["prop_weight"] = edges_df[edge_weight_col] / total_edge_weights - edges_df = ( - edges_df - .merge( - copied_nodes_df, left_on=edge_source_col, right_on=node_name_col, how="left" - ) - .drop(columns=[node_name_col]) - .rename(columns={"prop_occurrence": "source_prop"}) - ) - edges_df = ( - edges_df - .merge( - copied_nodes_df, left_on=edge_target_col, right_on=node_name_col, how="left" - ) - .drop(columns=[node_name_col]) - .rename(columns={"prop_occurrence": "target_prop"}) - ) - edges_df[edge_weight_col] = edges_df["prop_weight"] * np.log2( - edges_df["prop_weight"] / (edges_df["source_prop"] * edges_df["target_prop"]) - ) - - return edges_df.drop(columns=["prop_weight", "source_prop", "target_prop"]) - - -def calculate_rrf_edge_weights( - nodes_df: pd.DataFrame, - edges_df: pd.DataFrame, - node_name_col="title", - node_freq_col="freq", - edge_weight_col="weight", - edge_source_col="source", - edge_target_col="target", - rrf_smoothing_factor: int = 60, -) -> pd.DataFrame: - """Calculate reciprocal rank fusion (RRF) edge weights as a combination of PMI weight and combined freq of source and target.""" - edges_df = calculate_pmi_edge_weights( - nodes_df, - edges_df, - node_name_col, - node_freq_col, - edge_weight_col, - edge_source_col, - edge_target_col, - ) - - edges_df["pmi_rank"] = edges_df[edge_weight_col].rank(method="min", ascending=False) - edges_df["raw_weight_rank"] = edges_df[edge_weight_col].rank( - method="min", ascending=False - ) - edges_df[edge_weight_col] = edges_df.apply( - lambda x: ( - (1 / (rrf_smoothing_factor + x["pmi_rank"])) - + (1 / (rrf_smoothing_factor + x["raw_weight_rank"])) - ), - axis=1, - ) - - return edges_df.drop(columns=["pmi_rank", "raw_weight_rank"]) - - -def get_upper_threshold_by_std(data: list[float] | list[int], std_trim: float) -> float: - """Get upper threshold by standard deviation.""" - mean = np.mean(data) - std = np.std(data) - return cast("float", mean + std_trim * std) - - -def first_level_hierarchical_clustering( - hcs: list[gn.HierarchicalCluster], -) -> dict[Any, int]: - """first_level_hierarchical_clustering. - - Returns - ------- - dict[Any, int] - The initial leiden algorithm clustering results as a dictionary - of node id to community id. - """ - return {entry.node: entry.cluster for entry in hcs if entry.level == 0} - - -def final_level_hierarchical_clustering( - hcs: list[gn.HierarchicalCluster], -) -> dict[Any, int]: - """ - final_level_hierarchical_clustering. - - Returns - ------- - dict[Any, int] - The last leiden algorithm clustering results as a dictionary - of node id to community id. - """ - return {entry.node: entry.cluster for entry in hcs if entry.is_final_cluster} diff --git a/packages/graphrag/graphrag/index/workflows/create_communities.py b/packages/graphrag/graphrag/index/workflows/create_communities.py index a9c4fb2054..28ef3f5421 100644 --- a/packages/graphrag/graphrag/index/workflows/create_communities.py +++ b/packages/graphrag/graphrag/index/workflows/create_communities.py @@ -15,7 +15,6 @@ from graphrag.data_model.data_reader import DataReader from graphrag.data_model.schemas import COMMUNITIES_FINAL_COLUMNS from graphrag.index.operations.cluster_graph import cluster_graph -from graphrag.index.operations.create_graph import create_graph from graphrag.index.typing.context import PipelineRunContext from graphrag.index.typing.workflow import WorkflowFunctionOutput @@ -58,10 +57,8 @@ def create_communities( seed: int | None = None, ) -> pd.DataFrame: """All the steps to transform final communities.""" - graph = create_graph(relationships, edge_attr=["weight"]) - clusters = cluster_graph( - graph, + relationships, max_cluster_size, use_lcc, seed=seed, diff --git a/packages/graphrag/graphrag/index/workflows/finalize_graph.py b/packages/graphrag/graphrag/index/workflows/finalize_graph.py index 31fc9fddd4..395299eaa7 100644 --- a/packages/graphrag/graphrag/index/workflows/finalize_graph.py +++ b/packages/graphrag/graphrag/index/workflows/finalize_graph.py @@ -9,7 +9,6 @@ from graphrag.config.models.graph_rag_config import GraphRagConfig from graphrag.data_model.data_reader import DataReader -from graphrag.index.operations.create_graph import create_graph from graphrag.index.operations.finalize_entities import finalize_entities from graphrag.index.operations.finalize_relationships import finalize_relationships from graphrag.index.operations.snapshot_graphml import snapshot_graphml @@ -40,10 +39,8 @@ async def run_workflow( ) if config.snapshots.graphml: - graph = create_graph(final_relationships, edge_attr=["weight"]) - await snapshot_graphml( - graph, + final_relationships, name="graph", storage=context.output_storage, ) diff --git a/packages/graphrag/graphrag/index/workflows/prune_graph.py b/packages/graphrag/graphrag/index/workflows/prune_graph.py index f3720fd1ee..92edc01541 100644 --- a/packages/graphrag/graphrag/index/workflows/prune_graph.py +++ b/packages/graphrag/graphrag/index/workflows/prune_graph.py @@ -10,8 +10,6 @@ from graphrag.config.models.graph_rag_config import GraphRagConfig from graphrag.config.models.prune_graph_config import PruneGraphConfig from graphrag.data_model.data_reader import DataReader -from graphrag.index.operations.create_graph import create_graph -from graphrag.index.operations.graph_to_dataframes import graph_to_dataframes from graphrag.index.operations.prune_graph import prune_graph as prune_graph_operation from graphrag.index.typing.context import PipelineRunContext from graphrag.index.typing.workflow import WorkflowFunctionOutput @@ -55,11 +53,9 @@ def prune_graph( pruning_config: PruneGraphConfig, ) -> tuple[pd.DataFrame, pd.DataFrame]: """Prune a full graph based on graph statistics.""" - # create a temporary graph to prune, then turn it back into dataframes - graph = create_graph(relationships, edge_attr=["weight"], nodes=entities) - - pruned = prune_graph_operation( - graph, + pruned_entities, pruned_relationships = prune_graph_operation( + entities, + relationships, min_node_freq=pruning_config.min_node_freq, max_node_freq_std=pruning_config.max_node_freq_std, min_node_degree=pruning_config.min_node_degree, @@ -69,24 +65,14 @@ def prune_graph( lcc_only=pruning_config.lcc_only, ) - if len(pruned.nodes) == 0: + if len(pruned_entities) == 0: error_msg = "Graph Pruning failed. No entities remain." logger.error(error_msg) raise ValueError(error_msg) - if len(pruned.edges) == 0: + if len(pruned_relationships) == 0: error_msg = "Graph Pruning failed. No relationships remain." logger.error(error_msg) raise ValueError(error_msg) - pruned_nodes, pruned_edges = graph_to_dataframes( - pruned, node_columns=["title"], edge_columns=["source", "target"] - ) - - # subset the full nodes and edges to only include the pruned remainders - subset_entities = pruned_nodes.merge(entities, on="title", how="inner") - subset_relationships = pruned_edges.merge( - relationships, on=["source", "target"], how="inner" - ) - - return (subset_entities, subset_relationships) + return (pruned_entities, pruned_relationships) diff --git a/tests/unit/graphs/__init__.py b/tests/unit/graphs/__init__.py new file mode 100644 index 0000000000..98c1984e76 --- /dev/null +++ b/tests/unit/graphs/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Graph utility tests""" diff --git a/tests/unit/graphs/fixtures/graph.json b/tests/unit/graphs/fixtures/graph.json new file mode 100644 index 0000000000..378defb055 --- /dev/null +++ b/tests/unit/graphs/fixtures/graph.json @@ -0,0 +1,7012 @@ +{ + "nodes": [ + { + "title": "PROJECT GUTENBERG", + "type": "ORGANIZATION" + }, + { + "title": "CHARLES DICKENS", + "type": "PERSON" + }, + { + "title": "ARTHUR RACKHAM", + "type": "PERSON" + }, + { + "title": "J. B. LIPPINCOTT COMPANY", + "type": "ORGANIZATION" + }, + { + "title": "UNITED STATES", + "type": "GEO" + }, + { + "title": "PHILADELPHIA", + "type": "GEO" + }, + { + "title": "NEW YORK", + "type": "GEO" + }, + { + "title": "A CHRISTMAS CAROL", + "type": "EVENT" + }, + { + "title": "BOB CRATCHIT", + "type": "PERSON" + }, + { + "title": "PETER CRATCHIT", + "type": "PERSON" + }, + { + "title": "TIM CRATCHIT", + "type": "PERSON" + }, + { + "title": "MR. FEZZIWIG", + "type": "PERSON" + }, + { + "title": "FRED", + "type": "PERSON" + }, + { + "title": "GHOST OF CHRISTMAS PAST", + "type": "PERSON" + }, + { + "title": "GHOST OF CHRISTMAS PRESENT", + "type": "PERSON" + }, + { + "title": "GHOST OF CHRISTMAS YET TO COME", + "type": "PERSON" + }, + { + "title": "GHOST OF JACOB MARLEY", + "type": "PERSON" + }, + { + "title": "JOE", + "type": "PERSON" + }, + { + "title": "EBENEZER SCROOGE", + "type": "PERSON" + }, + { + "title": "SCROOGE AND MARLEY", + "type": "ORGANIZATION" + }, + { + "title": "MR. TOPPER", + "type": "PERSON" + }, + { + "title": "DICK WILKINS", + "type": "PERSON" + }, + { + "title": "BELLE", + "type": "PERSON" + }, + { + "title": "CAROLINE", + "type": "PERSON" + }, + { + "title": "MRS. CRATCHIT", + "type": "PERSON" + }, + { + "title": "BELINDA CRATCHIT", + "type": "PERSON" + }, + { + "title": "MARTHA CRATCHIT", + "type": "PERSON" + }, + { + "title": "MRS. DILBER", + "type": "PERSON" + }, + { + "title": "FAN", + "type": "PERSON" + }, + { + "title": "MRS. FEZZIWIG", + "type": "PERSON" + }, + { + "title": "JANET BLENKINSHIP", + "type": "PERSON" + }, + { + "title": "ONLINE DISTRIBUTED PROOFREADING TEAM", + "type": "ORGANIZATION" + }, + { + "title": "GREAT BRITAIN", + "type": "GEO" + }, + { + "title": "MARLEY'S GHOST", + "type": "EVENT" + }, + { + "title": "THE FIRST OF THE THREE SPIRITS", + "type": "EVENT" + }, + { + "title": "THE SECOND OF THE THREE SPIRITS", + "type": "EVENT" + }, + { + "title": "THE LAST OF THE SPIRITS", + "type": "EVENT" + }, + { + "title": "THE END OF IT", + "type": "EVENT" + }, + { + "title": "MARLEY", + "type": "PERSON" + }, + { + "title": "SCROOGE", + "type": "PERSON" + }, + { + "title": "ST. PAUL'S CHURCHYARD", + "type": "GEO" + }, + { + "title": "MARLEY'S FUNERAL", + "type": "EVENT" + }, + { + "title": "FEZZIWIG", + "type": "PERSON" + }, + { + "title": "TIM", + "type": "PERSON" + }, + { + "title": "THE CLERGYMAN", + "type": "PERSON" + }, + { + "title": "THE CLERK", + "type": "PERSON" + }, + { + "title": "THE UNDERTAKER", + "type": "PERSON" + }, + { + "title": "THE CHIEF MOURNER", + "type": "PERSON" + }, + { + "title": "THE WOMAN", + "type": "PERSON" + }, + { + "title": "OLD SCRATCH", + "type": "PERSON" + }, + { + "title": "LONDON", + "type": "GEO" + }, + { + "title": "CHRISTMAS", + "type": "EVENT" + }, + { + "title": "SCROOGE'S FUNERAL", + "type": "EVENT" + }, + { + "title": "SCROOGE'S NEPHEW", + "type": "PERSON" + }, + { + "title": "SCROOGE'S CLERK", + "type": "PERSON" + }, + { + "title": "COUNTING-HOUSE", + "type": "ORGANIZATION" + }, + { + "title": "THE CITY", + "type": "GEO" + }, + { + "title": "CHRISTMAS EVE", + "type": "EVENT" + }, + { + "title": "BLIND MEN'S DOGS", + "type": "PERSON" + }, + { + "title": "THE COURT", + "type": "GEO" + }, + { + "title": "NEIGHBOURING OFFICES", + "type": "ORGANIZATION" + }, + { + "title": "THE HOUSES OPPOSITE", + "type": "ORGANIZATION" + }, + { + "title": "NATURE", + "type": "PERSON" + }, + { + "title": "JACOB MARLEY", + "type": "PERSON" + }, + { + "title": "PORTLY GENTLEMEN", + "type": "PERSON" + }, + { + "title": "NEW YEAR", + "type": "EVENT" + }, + { + "title": "BEDLAM", + "type": "GEO" + }, + { + "title": "SCROOGE'S WIFE", + "type": "PERSON" + }, + { + "title": "SCROOGE'S FAMILY", + "type": "PERSON" + }, + { + "title": "PARLIAMENT", + "type": "ORGANIZATION" + }, + { + "title": "CREDENTIALS", + "type": "EVENT" + }, + { + "title": "POOR AND DESTITUTE", + "type": "PERSON" + }, + { + "title": "FESTIVE SEASON", + "type": "EVENT" + }, + { + "title": "GENTLEMAN", + "type": "PERSON" + }, + { + "title": "LORD MAYOR", + "type": "PERSON" + }, + { + "title": "TAILOR", + "type": "PERSON" + }, + { + "title": "ST. DUNSTAN", + "type": "PERSON" + }, + { + "title": "EVIL SPIRIT", + "type": "PERSON" + }, + { + "title": "SINGER", + "type": "PERSON" + }, + { + "title": "CLERK", + "type": "PERSON" + }, + { + "title": "UNION WORKHOUSES", + "type": "ORGANIZATION" + }, + { + "title": "PRISONS", + "type": "ORGANIZATION" + }, + { + "title": "TREADMILL", + "type": "ORGANIZATION" + }, + { + "title": "POOR LAW", + "type": "ORGANIZATION" + }, + { + "title": "MANSION HOUSE", + "type": "ORGANIZATION" + }, + { + "title": "CHURCH", + "type": "ORGANIZATION" + }, + { + "title": "MAIN STREET", + "type": "GEO" + }, + { + "title": "COURT", + "type": "GEO" + }, + { + "title": "GARRET", + "type": "GEO" + }, + { + "title": "CHRISTMAS CAROL", + "type": "EVENT" + }, + { + "title": "DESTIUTE", + "type": "PERSON" + }, + { + "title": "LABOURERS", + "type": "PERSON" + }, + { + "title": "RAGGED MEN AND BOYS", + "type": "PERSON" + }, + { + "title": "PEOPLE", + "type": "PERSON" + }, + { + "title": "FIFTY COOKS AND BUTLERS", + "type": "PERSON" + }, + { + "title": "TAILOR'S WIFE", + "type": "PERSON" + }, + { + "title": "BABY", + "type": "PERSON" + }, + { + "title": "SHOPS", + "type": "ORGANIZATION" + }, + { + "title": "POULTERERS", + "type": "ORGANIZATION" + }, + { + "title": "GROCERS", + "type": "ORGANIZATION" + }, + { + "title": "ESTABLISHMENTS", + "type": "ORGANIZATION" + }, + { + "title": "CITY", + "type": "GEO" + }, + { + "title": "MONDAY", + "type": "EVENT" + }, + { + "title": "PREVIOUS MONDAY", + "type": "EVENT" + }, + { + "title": "CITY OF LONDON", + "type": "GEO" + }, + { + "title": "CORNHILL", + "type": "GEO" + }, + { + "title": "CAMDEN TOWN", + "type": "GEO" + }, + { + "title": "CORPORATION, ALDERMEN, AND LIVERY", + "type": "ORGANIZATION" + }, + { + "title": "SCROOGE'S CHAMBERS", + "type": "GEO" + }, + { + "title": "YARD (SCROOGE'S HOUSE)", + "type": "GEO" + }, + { + "title": "TAVERN", + "type": "GEO" + }, + { + "title": "WINE-MERCHANT'S CELLARS", + "type": "GEO" + }, + { + "title": "BANKER'S BOOK", + "type": "ORGANIZATION" + }, + { + "title": "WINE-MERCHANT'S CELLAR", + "type": "GEO" + }, + { + "title": "DUTCH MERCHANT", + "type": "PERSON" + }, + { + "title": "SCROOGE'S HOUSE", + "type": "GEO" + }, + { + "title": "SCRIPTURES", + "type": "EVENT" + }, + { + "title": "ABEL", + "type": "PERSON" + }, + { + "title": "PHARAOH'S DAUGHTERS", + "type": "PERSON" + }, + { + "title": "QUEENS OF SHEBA", + "type": "PERSON" + }, + { + "title": "ANGELIC MESSENGERS", + "type": "PERSON" + }, + { + "title": "ABRAHAM", + "type": "PERSON" + }, + { + "title": "BELSHAZZAR", + "type": "PERSON" + }, + { + "title": "APOSTLES", + "type": "PERSON" + }, + { + "title": "STREET", + "type": "GEO" + }, + { + "title": "ACT OF PARLIAMENT", + "type": "EVENT" + }, + { + "title": "HEARSE", + "type": "EVENT" + }, + { + "title": "BEDROOM", + "type": "GEO" + }, + { + "title": "SITTING-ROOM", + "type": "GEO" + }, + { + "title": "LUMBER-ROOM", + "type": "GEO" + }, + { + "title": "CLOSET", + "type": "GEO" + }, + { + "title": "CHAMBER", + "type": "GEO" + }, + { + "title": "BUILDING", + "type": "GEO" + }, + { + "title": "FIREPLACE", + "type": "GEO" + }, + { + "title": "CHAIR", + "type": "GEO" + }, + { + "title": "CHAIN", + "type": "EVENT" + }, + { + "title": "BANDAGE", + "type": "EVENT" + }, + { + "title": "SPECTRE'S CRY", + "type": "EVENT" + }, + { + "title": "THE GHOST OF JACOB MARLEY", + "type": "PERSON" + }, + { + "title": "THE COUNTING-HOUSE", + "type": "ORGANIZATION" + }, + { + "title": "THE WARD", + "type": "GEO" + }, + { + "title": "THE STAR", + "type": "EVENT" + }, + { + "title": "FELLOW-MEN", + "type": "PERSON" + }, + { + "title": "MANKIND", + "type": "ORGANIZATION" + }, + { + "title": "CHRISTIAN SPIRIT", + "type": "PERSON" + }, + { + "title": "POOR HOMES", + "type": "GEO" + }, + { + "title": "EARTH", + "type": "GEO" + }, + { + "title": "ETERNITY", + "type": "EVENT" + }, + { + "title": "THREE SPIRITS", + "type": "PERSON" + }, + { + "title": "PHANTOMS", + "type": "PERSON" + }, + { + "title": "INVISIBLE WORLD", + "type": "EVENT" + }, + { + "title": "INFANT", + "type": "PERSON" + }, + { + "title": "OLD GHOST IN WHITE WAISTCOAT", + "type": "PERSON" + }, + { + "title": "GUILTY GOVERNMENTS", + "type": "ORGANIZATION" + }, + { + "title": "SCROOGE'S CHAMBER", + "type": "GEO" + }, + { + "title": "WINDOW", + "type": "GEO" + }, + { + "title": "DOOR", + "type": "GEO" + }, + { + "title": "NIGHT", + "type": "EVENT" + }, + { + "title": "BELL", + "type": "EVENT" + }, + { + "title": "CLOCK", + "type": "EVENT" + }, + { + "title": "THE FIRST OF EXCHANGE", + "type": "EVENT" + }, + { + "title": "UNITED STATES SECURITY", + "type": "EVENT" + }, + { + "title": "THE GHOST OF CHRISTMAS PAST", + "type": "PERSON" + }, + { + "title": "SCROOGE'S BEDROOM", + "type": "GEO" + }, + { + "title": "THE WORLD", + "type": "GEO" + }, + { + "title": "THE CLOCK", + "type": "EVENT" + }, + { + "title": "THE BELL", + "type": "EVENT" + }, + { + "title": "THE CURTAINS", + "type": "EVENT" + }, + { + "title": "MARKET-TOWN", + "type": "GEO" + }, + { + "title": "SCHOOL", + "type": "GEO" + }, + { + "title": "FARMERS", + "type": "PERSON" + }, + { + "title": "FRIENDS", + "type": "PERSON" + }, + { + "title": "ROAD", + "type": "GEO" + }, + { + "title": "LANE", + "type": "GEO" + }, + { + "title": "FIELDS", + "type": "GEO" + }, + { + "title": "BRIDGE", + "type": "GEO" + }, + { + "title": "RIVER", + "type": "GEO" + }, + { + "title": "GHOST", + "type": "PERSON" + }, + { + "title": "ALI BABA", + "type": "PERSON" + }, + { + "title": "VALENTINE", + "type": "PERSON" + }, + { + "title": "ORSON", + "type": "PERSON" + }, + { + "title": "SULTAN'S GROOM", + "type": "PERSON" + }, + { + "title": "GENII", + "type": "PERSON" + }, + { + "title": "PRINCESS", + "type": "PERSON" + }, + { + "title": "ROBIN CRUSOE", + "type": "PERSON" + }, + { + "title": "PARROT", + "type": "PERSON" + }, + { + "title": "FRIDAY", + "type": "PERSON" + }, + { + "title": "DAMASCUS", + "type": "GEO" + }, + { + "title": "MANSION", + "type": "GEO" + }, + { + "title": "JOLLY HOLIDAYS", + "type": "EVENT" + }, + { + "title": "BOY SINGING AT SCROOGE'S DOOR", + "type": "PERSON" + }, + { + "title": "SPIRIT", + "type": "PERSON" + }, + { + "title": "OFFICES", + "type": "ORGANIZATION" + }, + { + "title": "STABLES", + "type": "ORGANIZATION" + }, + { + "title": "COACH-HOUSES", + "type": "ORGANIZATION" + }, + { + "title": "SHEDS", + "type": "ORGANIZATION" + }, + { + "title": "STOREHOUSE", + "type": "ORGANIZATION" + }, + { + "title": "HIGH-ROAD", + "type": "GEO" + }, + { + "title": "YARD", + "type": "GEO" + }, + { + "title": "HALL", + "type": "GEO" + }, + { + "title": "ROOM", + "type": "GEO" + }, + { + "title": "ISLAND", + "type": "GEO" + }, + { + "title": "CREEK", + "type": "GEO" + }, + { + "title": "SCHOOLMASTER", + "type": "PERSON" + }, + { + "title": "WAREHOUSE", + "type": "ORGANIZATION" + }, + { + "title": "SERVANT", + "type": "PERSON" + }, + { + "title": "FATHER", + "type": "PERSON" + }, + { + "title": "GARDEN SWEEP", + "type": "GEO" + }, + { + "title": "CHAIR/COACH", + "type": "ORGANIZATION" + }, + { + "title": "PARLOUR", + "type": "GEO" + }, + { + "title": "MISS FEZZIWIGS", + "type": "PERSON" + }, + { + "title": "FEZZIWIG'S WAREHOUSE", + "type": "ORGANIZATION" + }, + { + "title": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "type": "EVENT" + }, + { + "title": "THE FIDDLER", + "type": "PERSON" + }, + { + "title": "THE HOUSEMAID", + "type": "PERSON" + }, + { + "title": "THE BAKER", + "type": "PERSON" + }, + { + "title": "THE COOK", + "type": "PERSON" + }, + { + "title": "THE MILKMAN", + "type": "PERSON" + }, + { + "title": "THE BOY FROM OVER THE WAY", + "type": "PERSON" + }, + { + "title": "THE GIRL FROM NEXT DOOR BUT ONE", + "type": "PERSON" + }, + { + "title": "YOUNG MEN AND WOMEN EMPLOYED IN THE BUSINESS", + "type": "PERSON" + }, + { + "title": "THE THREE MISS FEZZIWIGS' SIX YOUNG FOLLOWERS", + "type": "PERSON" + }, + { + "title": "DICK", + "type": "PERSON" + }, + { + "title": "THE TWO APPRENTICES", + "type": "PERSON" + }, + { + "title": "THE FAIR YOUNG GIRL", + "type": "PERSON" + }, + { + "title": "THE DOMESTIC BALL", + "type": "EVENT" + }, + { + "title": "BACK-SHOP", + "type": "GEO" + }, + { + "title": "YOUNG SCROOGE", + "type": "PERSON" + }, + { + "title": "OLDER SCROOGE", + "type": "PERSON" + }, + { + "title": "THE SHOP", + "type": "GEO" + }, + { + "title": "THE DOOR", + "type": "GEO" + }, + { + "title": "THE GIRL", + "type": "PERSON" + }, + { + "title": "THE GHOST", + "type": "PERSON" + }, + { + "title": "THE MATRON", + "type": "PERSON" + }, + { + "title": "THE DAUGHTER", + "type": "PERSON" + }, + { + "title": "THE FATHER", + "type": "PERSON" + }, + { + "title": "HOME", + "type": "GEO" + }, + { + "title": "PORTER", + "type": "PERSON" + }, + { + "title": "WINTER FIRE", + "type": "EVENT" + }, + { + "title": "SCROOGE'S OFFICE", + "type": "GEO" + }, + { + "title": "CHRISTMAS TOYS AND PRESENTS", + "type": "EVENT" + }, + { + "title": "BELLE'S DAUGHTER", + "type": "PERSON" + }, + { + "title": "BELLE'S FAMILY", + "type": "ORGANIZATION" + }, + { + "title": "THE PORTER", + "type": "PERSON" + }, + { + "title": "THE CHILDREN", + "type": "PERSON" + }, + { + "title": "THE BABY", + "type": "PERSON" + }, + { + "title": "THE HOUSE", + "type": "GEO" + }, + { + "title": "THE PARLOUR", + "type": "GEO" + }, + { + "title": "THE TOP OF THE HOUSE", + "type": "GEO" + }, + { + "title": "THE FIRESIDE", + "type": "GEO" + }, + { + "title": "THE EXTINQUISHER-CAP", + "type": "EVENT" + }, + { + "title": "SCROOGE'S ROOM", + "type": "GEO" + }, + { + "title": "MISTLETOE", + "type": "GEO" + }, + { + "title": "IVY", + "type": "GEO" + }, + { + "title": "PLENTY'S HORN", + "type": "EVENT" + }, + { + "title": "CHIMNEY", + "type": "GEO" + }, + { + "title": "BED", + "type": "GEO" + }, + { + "title": "LOCK", + "type": "GEO" + }, + { + "title": "HEARTH", + "type": "GEO" + }, + { + "title": "THRONE", + "type": "GEO" + }, + { + "title": "ANTIQUE SCABBARD", + "type": "GEO" + }, + { + "title": "SPIRIT'S FAMILY", + "type": "PERSON" + }, + { + "title": "YOUNGER MEMBERS OF SPIRIT'S FAMILY", + "type": "PERSON" + }, + { + "title": "ELDER BROTHERS OF SPIRIT'S FAMILY", + "type": "PERSON" + }, + { + "title": "CHRISTMAS MORNING", + "type": "EVENT" + }, + { + "title": "FRUITERERS", + "type": "ORGANIZATION" + }, + { + "title": "HOUSE-TOPS", + "type": "GEO" + }, + { + "title": "POULTERERS' SHOPS", + "type": "ORGANIZATION" + }, + { + "title": "FRUITERERS' SHOPS", + "type": "ORGANIZATION" + }, + { + "title": "SHOPKEEPERS", + "type": "PERSON" + }, + { + "title": "GROCER", + "type": "PERSON" + }, + { + "title": "CUSTOMERS", + "type": "PERSON" + }, + { + "title": "BOYS", + "type": "PERSON" + }, + { + "title": "CRATCHIT FAMILY", + "type": "ORGANIZATION" + }, + { + "title": "BAKERS' SHOPS", + "type": "ORGANIZATION" + }, + { + "title": "SUBURBS OF THE TOWN", + "type": "GEO" + }, + { + "title": "PARKS", + "type": "GEO" + }, + { + "title": "CHRISTMAS DAY", + "type": "EVENT" + }, + { + "title": "SEVENTH DAY", + "type": "EVENT" + }, + { + "title": "DINNER-CARRIERS", + "type": "PERSON" + }, + { + "title": "GROCER'S PEOPLE", + "type": "PERSON" + }, + { + "title": "CHAPEL", + "type": "ORGANIZATION" + }, + { + "title": "TOWN", + "type": "GEO" + }, + { + "title": "BOY CRATCHIT", + "type": "PERSON" + }, + { + "title": "GIRL CRATCHIT", + "type": "PERSON" + }, + { + "title": "TINY TIM", + "type": "PERSON" + }, + { + "title": "YOUNG CRATCHITS", + "type": "PERSON" + }, + { + "title": "CHRISTMAS DINNER", + "type": "EVENT" + }, + { + "title": "BAKER'S SHOP", + "type": "GEO" + }, + { + "title": "GOOSE", + "type": "EVENT" + }, + { + "title": "BACK-YARD", + "type": "GEO" + }, + { + "title": "POOR CHIMNEY CORNER", + "type": "GEO" + }, + { + "title": "FUTURE", + "type": "EVENT" + }, + { + "title": "FOUNDER OF THE FEAST", + "type": "PERSON" + }, + { + "title": "COPPER", + "type": "GEO" + }, + { + "title": "EATING-HOUSE", + "type": "GEO" + }, + { + "title": "PASTRY-COOK'S", + "type": "GEO" + }, + { + "title": "LAUNDRESS'S", + "type": "GEO" + }, + { + "title": "CHRISTMAS HOLLY", + "type": "GEO" + }, + { + "title": "JUG", + "type": "GEO" + }, + { + "title": "GLASS", + "type": "GEO" + }, + { + "title": "CUSTARD CUP", + "type": "GEO" + }, + { + "title": "CHESTNUTS", + "type": "GEO" + }, + { + "title": "APPLE SAUCE", + "type": "GEO" + }, + { + "title": "MASHED POTATOES", + "type": "GEO" + }, + { + "title": "CHRISTMAS PUDDING", + "type": "EVENT" + }, + { + "title": "CHRISTMAS GOOSE", + "type": "EVENT" + }, + { + "title": "SURPLUS POPULATION", + "type": "EVENT" + }, + { + "title": "THE SPIRIT", + "type": "PERSON" + }, + { + "title": "THE CRATCHIT FAMILY", + "type": "ORGANIZATION" + }, + { + "title": "THE FEAST", + "type": "EVENT" + }, + { + "title": "THE MILLINER'S", + "type": "ORGANIZATION" + }, + { + "title": "THE LAMPLIGHTER", + "type": "PERSON" + }, + { + "title": "THE STREET", + "type": "GEO" + }, + { + "title": "LORD", + "type": "PERSON" + }, + { + "title": "MASTER PETER", + "type": "PERSON" + }, + { + "title": "GUESTS", + "type": "PERSON" + }, + { + "title": "HANDSOME GIRLS", + "type": "PERSON" + }, + { + "title": "NEIGHBOUR", + "type": "PERSON" + }, + { + "title": "SINGLE MAN", + "type": "PERSON" + }, + { + "title": "SPIRIT'S TORCH", + "type": "EVENT" + }, + { + "title": "SONG ABOUT A LOST CHILD", + "type": "EVENT" + }, + { + "title": "PAWNBROKER'S", + "type": "ORGANIZATION" + }, + { + "title": "KITCHENS", + "type": "GEO" + }, + { + "title": "PARLOURS", + "type": "GEO" + }, + { + "title": "ROOMS", + "type": "GEO" + }, + { + "title": "HOUSE", + "type": "GEO" + }, + { + "title": "WINDOW-BLINDS", + "type": "GEO" + }, + { + "title": "SNOW", + "type": "GEO" + }, + { + "title": "EVENING", + "type": "EVENT" + }, + { + "title": "FIRE", + "type": "EVENT" + }, + { + "title": "MINERS", + "type": "PERSON" + }, + { + "title": "OLD MAN MINER", + "type": "PERSON" + }, + { + "title": "LIGHTHOUSE KEEPERS", + "type": "PERSON" + }, + { + "title": "ELDER LIGHTHOUSE KEEPER", + "type": "PERSON" + }, + { + "title": "SHIP CREW", + "type": "PERSON" + }, + { + "title": "MOOR", + "type": "GEO" + }, + { + "title": "LIGHTHOUSE", + "type": "GEO" + }, + { + "title": "SHIP", + "type": "GEO" + }, + { + "title": "OLD WOMAN MINER", + "type": "PERSON" + }, + { + "title": "OFFICERS", + "type": "PERSON" + }, + { + "title": "HELMSMAN", + "type": "PERSON" + }, + { + "title": "LOOK-OUT", + "type": "PERSON" + }, + { + "title": "BURIAL-PLACE OF GIANTS", + "type": "GEO" + }, + { + "title": "WEST", + "type": "GEO" + }, + { + "title": "REEF OF SUNKEN ROCKS", + "type": "GEO" + }, + { + "title": "SEA", + "type": "GEO" + }, + { + "title": "CHRISTMAS SONG", + "type": "EVENT" + }, + { + "title": "SCROOGE'S NIECE", + "type": "PERSON" + }, + { + "title": "SCROOGE'S NIECE'S SISTERS", + "type": "PERSON" + }, + { + "title": "TOPPER", + "type": "PERSON" + }, + { + "title": "THE LADIES", + "type": "PERSON" + }, + { + "title": "THE PLUMP SISTER", + "type": "PERSON" + }, + { + "title": "THE SISTER WITH ROSES", + "type": "PERSON" + }, + { + "title": "THE HOUSEKEEPERS", + "type": "PERSON" + }, + { + "title": "THE DINNER", + "type": "EVENT" + }, + { + "title": "THE DESSERT", + "type": "EVENT" + }, + { + "title": "THE MUSIC", + "type": "EVENT" + }, + { + "title": "THE TEA", + "type": "EVENT" + }, + { + "title": "PLUMP SISTER", + "type": "PERSON" + }, + { + "title": "SEXT\u00d3N", + "type": "PERSON" + }, + { + "title": "WHITECHAPEL", + "type": "GEO" + }, + { + "title": "BOARDING-SCHOOL", + "type": "ORGANIZATION" + }, + { + "title": "PIANO", + "type": "ORGANIZATION" + }, + { + "title": "FIRE-IRONS", + "type": "ORGANIZATION" + }, + { + "title": "CURTAINS", + "type": "ORGANIZATION" + }, + { + "title": "CHAIRS", + "type": "ORGANIZATION" + }, + { + "title": "RING", + "type": "ORGANIZATION" + }, + { + "title": "GAME OF YES AND NO", + "type": "EVENT" + }, + { + "title": "GAME OF BLIND MAN'S-BUFF", + "type": "EVENT" + }, + { + "title": "GAME OF FORFEITS", + "type": "EVENT" + }, + { + "title": "GAME OF HOW, WHEN, AND WHERE", + "type": "EVENT" + }, + { + "title": "BOY (IGNORANCE)", + "type": "PERSON" + }, + { + "title": "GIRL", + "type": "PERSON" + }, + { + "title": "ALMSHOUSE", + "type": "ORGANIZATION" + }, + { + "title": "HOSPITAL", + "type": "ORGANIZATION" + }, + { + "title": "GAOL", + "type": "ORGANIZATION" + }, + { + "title": "CHRISTMAS HOLIDAYS", + "type": "EVENT" + }, + { + "title": "TWELFTH-NIGHT PARTY", + "type": "EVENT" + }, + { + "title": "COMPANY AT FRED'S PARTY", + "type": "ORGANIZATION" + }, + { + "title": "FOREIGN LANDS", + "type": "GEO" + }, + { + "title": "SICK-BEDS", + "type": "GEO" + }, + { + "title": "MIDNIGHT", + "type": "EVENT" + }, + { + "title": "THREE-QUARTERS PAST ELEVEN", + "type": "EVENT" + }, + { + "title": "CHIMES", + "type": "EVENT" + }, + { + "title": "POVERTY", + "type": "EVENT" + }, + { + "title": "MISERY'S REFUGE", + "type": "GEO" + }, + { + "title": "AUTHORITY", + "type": "PERSON" + }, + { + "title": "MAN", + "type": "PERSON" + }, + { + "title": "IGNORANCE", + "type": "PERSON" + }, + { + "title": "WANT", + "type": "PERSON" + }, + { + "title": "WORKHOUSES", + "type": "ORGANIZATION" + }, + { + "title": "CHANGE", + "type": "ORGANIZATION" + }, + { + "title": "MERCHANTS", + "type": "PERSON" + }, + { + "title": "GREAT FAT MAN", + "type": "PERSON" + }, + { + "title": "RED-FACED GENTLEMAN", + "type": "PERSON" + }, + { + "title": "MAN WITH LARGE CHIN", + "type": "PERSON" + }, + { + "title": "CITY HEART", + "type": "GEO" + }, + { + "title": "DEATH OF UNNAMED MAN", + "type": "EVENT" + }, + { + "title": "JACOB", + "type": "PERSON" + }, + { + "title": "PHANTOM", + "type": "PERSON" + }, + { + "title": "BUSINESSMEN", + "type": "PERSON" + }, + { + "title": "BUSINESS COMPANY", + "type": "ORGANIZATION" + }, + { + "title": "DEN OF INFAMOUS RESORT", + "type": "GEO" + }, + { + "title": "RAG-AND-BONE SHOP", + "type": "ORGANIZATION" + }, + { + "title": "GREY-HAIRED RASCAL", + "type": "PERSON" + }, + { + "title": "BUSINESS DISTRICT", + "type": "GEO" + }, + { + "title": "OBSCURE PART OF TOWN", + "type": "GEO" + }, + { + "title": "ALLEYS AND ARCHWAYS", + "type": "GEO" + }, + { + "title": "CHRISTMAS-TIME", + "type": "EVENT" + }, + { + "title": "DEATH OF OLD SCRATCH", + "type": "EVENT" + }, + { + "title": "CHANGE OF LIFE", + "type": "EVENT" + }, + { + "title": "GROUP OF SPEAKERS AND LISTENERS", + "type": "ORGANIZATION" + }, + { + "title": "OLD JOE", + "type": "PERSON" + }, + { + "title": "CHARWOMAN", + "type": "PERSON" + }, + { + "title": "UNDERTAKER'S MAN", + "type": "PERSON" + }, + { + "title": "SHOP", + "type": "GEO" + }, + { + "title": "DEAD MAN", + "type": "PERSON" + }, + { + "title": "THE DEAD MAN", + "type": "PERSON" + }, + { + "title": "THE PHANTOM", + "type": "PERSON" + }, + { + "title": "THE ROOM", + "type": "GEO" + }, + { + "title": "DEATH", + "type": "EVENT" + }, + { + "title": "THE OLD MAN", + "type": "PERSON" + }, + { + "title": "THE BED", + "type": "GEO" + }, + { + "title": "THE SHEET", + "type": "GEO" + }, + { + "title": "THE BLANKETS", + "type": "GEO" + }, + { + "title": "THE SHIRT", + "type": "GEO" + }, + { + "title": "THE BUNDLE", + "type": "GEO" + }, + { + "title": "THE FLANNEL BAG", + "type": "GEO" + }, + { + "title": "THE LAMP", + "type": "GEO" + }, + { + "title": "CAROLINE'S HUSBAND", + "type": "PERSON" + }, + { + "title": "CRATCHIT'S WIFE", + "type": "PERSON" + }, + { + "title": "THE CREDITOR", + "type": "PERSON" + }, + { + "title": "THE TOWN", + "type": "GEO" + }, + { + "title": "BOB CRATCHIT'S HOUSE", + "type": "GEO" + }, + { + "title": "THE EVENT OF THE CREDITOR'S DEATH", + "type": "EVENT" + }, + { + "title": "THE EVENT OF TINY TIM'S DEATH", + "type": "EVENT" + }, + { + "title": "THE MOTHER IN THE FIRST SCENE", + "type": "PERSON" + }, + { + "title": "THE CHILDREN IN THE FIRST SCENE", + "type": "PERSON" + }, + { + "title": "THE HALF-DRUNKEN WOMAN", + "type": "PERSON" + }, + { + "title": "THE DARK CHAMBER", + "type": "GEO" + }, + { + "title": "THE ROOM BY DAYLIGHT", + "type": "GEO" + }, + { + "title": "SEVERAL STREETS", + "type": "GEO" + }, + { + "title": "THE DINNER BY THE FIRE", + "type": "EVENT" + }, + { + "title": "THE LONG-EXPECTED KNOCK", + "type": "EVENT" + }, + { + "title": "THE SCENE OF THE DEAD MAN", + "type": "EVENT" + }, + { + "title": "MR. SCROOGE'S NEPHEW", + "type": "PERSON" + }, + { + "title": "MR. SCROOGE", + "type": "PERSON" + }, + { + "title": "SUNDAY", + "type": "EVENT" + }, + { + "title": "CRATCHIT CHILDREN", + "type": "PERSON" + }, + { + "title": "ROBERT CRATCHIT", + "type": "PERSON" + }, + { + "title": "THE ROOM ABOVE", + "type": "GEO" + }, + { + "title": "THE FIRE", + "type": "GEO" + }, + { + "title": "THE TABLE", + "type": "GEO" + }, + { + "title": "CHURCHYARD", + "type": "GEO" + }, + { + "title": "THE GRAVE", + "type": "GEO" + }, + { + "title": "THE BEDPOST", + "type": "GEO" + }, + { + "title": "THE PAST", + "type": "EVENT" + }, + { + "title": "THE PRESENT", + "type": "EVENT" + }, + { + "title": "THE FUTURE", + "type": "EVENT" + }, + { + "title": "SPIRITS", + "type": "PERSON" + }, + { + "title": "THE BOY", + "type": "PERSON" + }, + { + "title": "POULTERER", + "type": "ORGANIZATION" + }, + { + "title": "CHURCHES", + "type": "ORGANIZATION" + }, + { + "title": "NEXT STREET", + "type": "GEO" + }, + { + "title": "JOE MILLER", + "type": "PERSON" + }, + { + "title": "PRIZE TURKEY", + "type": "EVENT" + }, + { + "title": "STREET-DOOR", + "type": "GEO" + }, + { + "title": "CORNER", + "type": "GEO" + }, + { + "title": "HEAVEN", + "type": "GEO" + }, + { + "title": "POULTERER'S MAN", + "type": "PERSON" + }, + { + "title": "PORTLY GENTLEMAN", + "type": "PERSON" + }, + { + "title": "SCROOGE'S NEPHEW (FRED)", + "type": "PERSON" + }, + { + "title": "FRED'S HOUSE", + "type": "GEO" + }, + { + "title": "SCROOGE'S NIECE BY MARRIAGE", + "type": "PERSON" + }, + { + "title": "CAB", + "type": "ORGANIZATION" + }, + { + "title": "DINING-ROOM", + "type": "GEO" + }, + { + "title": "KITCHENS OF HOUSES", + "type": "GEO" + }, + { + "title": "WINDOWS", + "type": "GEO" + }, + { + "title": "BEGGARS", + "type": "PERSON" + }, + { + "title": "CHILDREN", + "type": "PERSON" + }, + { + "title": "HOUSEKEEPERS", + "type": "PERSON" + }, + { + "title": "MISTRESS", + "type": "PERSON" + }, + { + "title": "SCROOGE'S HAND", + "type": "PERSON" + }, + { + "title": "CHRISTMAS DINNER AT FRED'S", + "type": "EVENT" + }, + { + "title": "SCROOGE'S TRANSFORMATION", + "type": "EVENT" + }, + { + "title": "BOB CRATCHIT'S FAMILY", + "type": "PERSON" + }, + { + "title": "GOOD OLD WORLD", + "type": "GEO" + }, + { + "title": "BOROUGH", + "type": "GEO" + }, + { + "title": "FOUNDATION", + "type": "ORGANIZATION" + }, + { + "title": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "type": "ORGANIZATION" + }, + { + "title": "TRANSCRIBER", + "type": "PERSON" + }, + { + "title": "WWW.GUTENBERG.ORG", + "type": "ORGANIZATION" + }, + { + "title": "GENERAL TERMS OF USE", + "type": "EVENT" + }, + { + "title": "FULL PROJECT GUTENBERG LICENSE", + "type": "EVENT" + }, + { + "title": "PROJECT GUTENBERG TRADEMARK LICENSE", + "type": "EVENT" + }, + { + "title": "PROJECT GUTENBERG EBOOK", + "type": "EVENT" + }, + { + "title": "COPYRIGHT HOLDER", + "type": "PERSON" + }, + { + "title": "PROJECT GUTENBERG VOLUNTEERS AND EMPLOYEES", + "type": "PERSON" + }, + { + "title": "PROJECT GUTENBERG\u2122 TRADEMARK", + "type": "ORGANIZATION" + }, + { + "title": "SECTION 4", + "type": "EVENT" + }, + { + "title": "PARAGRAPH 1.E.1", + "type": "EVENT" + }, + { + "title": "PARAGRAPH 1.E.7", + "type": "EVENT" + }, + { + "title": "PARAGRAPH 1.E.8", + "type": "EVENT" + }, + { + "title": "PARAGRAPH 1.E.9", + "type": "EVENT" + }, + { + "title": "PARAGRAPH 1.F.3", + "type": "EVENT" + }, + { + "title": "PLAIN VANILLA ASCII", + "type": "EVENT" + }, + { + "title": "COUNTRY", + "type": "GEO" + }, + { + "title": "INTERNAL REVENUE SERVICE", + "type": "ORGANIZATION" + }, + { + "title": "MISSISSIPPI", + "type": "GEO" + }, + { + "title": "SALT LAKE CITY", + "type": "GEO" + }, + { + "title": "VOLUNTEERS", + "type": "PERSON" + }, + { + "title": "DISTRIBUTOR", + "type": "PERSON" + }, + { + "title": "TRADEMARK OWNER", + "type": "PERSON" + }, + { + "title": "STATE", + "type": "GEO" + }, + { + "title": "AGENT", + "type": "PERSON" + }, + { + "title": "EMPLOYEE", + "type": "PERSON" + }, + { + "title": "EIN", + "type": "ORGANIZATION" + }, + { + "title": "BUSINESS OFFICE", + "type": "GEO" + }, + { + "title": "STATE OF MISSISSIPPI", + "type": "GEO" + }, + { + "title": "SALT LAKE CITY, UT", + "type": "GEO" + }, + { + "title": "PROFESSOR MICHAEL S. HART", + "type": "PERSON" + }, + { + "title": "CHARITIES", + "type": "ORGANIZATION" + }, + { + "title": "DONORS", + "type": "PERSON" + }, + { + "title": "VOLUNTEER SUPPORT", + "type": "ORGANIZATION" + }, + { + "title": "PG SEARCH FACILITY", + "type": "ORGANIZATION" + }, + { + "title": "EMAIL NEWSLETTER", + "type": "ORGANIZATION" + }, + { + "title": "WWW.GUTENBERG.ORG/CONTACT", + "type": "ORGANIZATION" + }, + { + "title": "WWW.GUTENBERG.ORG/DONATE", + "type": "ORGANIZATION" + } + ], + "edges": [ + { + "source": "CHARLES DICKENS", + "target": "A CHRISTMAS CAROL", + "weight": 20.0 + }, + { + "source": "ARTHUR RACKHAM", + "target": "A CHRISTMAS CAROL", + "weight": 8.0 + }, + { + "source": "J. B. LIPPINCOTT COMPANY", + "target": "A CHRISTMAS CAROL", + "weight": 8.0 + }, + { + "source": "J. B. LIPPINCOTT COMPANY", + "target": "PHILADELPHIA", + "weight": 7.0 + }, + { + "source": "J. B. LIPPINCOTT COMPANY", + "target": "NEW YORK", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "A CHRISTMAS CAROL", + "weight": 24.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "UNITED STATES", + "weight": 22.0 + }, + { + "source": "BOB CRATCHIT", + "target": "EBENEZER SCROOGE", + "weight": 9.0 + }, + { + "source": "PETER CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "TIM CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "BELINDA CRATCHIT", + "target": "MRS. CRATCHIT", + "weight": 9.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "MRS. CRATCHIT", + "weight": 9.0 + }, + { + "source": "BELINDA CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "MRS. FEZZIWIG", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE AND MARLEY", + "weight": 19.0 + }, + { + "source": "JACOB MARLEY", + "target": "SCROOGE AND MARLEY", + "weight": 19.0 + }, + { + "source": "GHOST OF JACOB MARLEY", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "FRED", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "FAN", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "DICK WILKINS", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "BELLE", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "CAROLINE", + "target": "EBENEZER SCROOGE", + "weight": 6.0 + }, + { + "source": "GHOST OF CHRISTMAS PAST", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "GHOST OF CHRISTMAS YET TO COME", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "MRS. DILBER", + "target": "EBENEZER SCROOGE", + "weight": 6.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "EBENEZER SCROOGE", + "weight": 1.0 + }, + { + "source": "SUZANNE SHELL", + "target": "PROJECT GUTENBERG", + "weight": 7.0 + }, + { + "source": "JANET BLENKINSHIP", + "target": "PROJECT GUTENBERG", + "weight": 7.0 + }, + { + "source": "ONLINE DISTRIBUTED PROOFREADING TEAM", + "target": "PROJECT GUTENBERG", + "weight": 7.0 + }, + { + "source": "MARLEY'S GHOST", + "target": "A CHRISTMAS CAROL", + "weight": 9.0 + }, + { + "source": "THE FIRST OF THE THREE SPIRITS", + "target": "A CHRISTMAS CAROL", + "weight": 9.0 + }, + { + "source": "THE SECOND OF THE THREE SPIRITS", + "target": "A CHRISTMAS CAROL", + "weight": 9.0 + }, + { + "source": "THE LAST OF THE SPIRITS", + "target": "A CHRISTMAS CAROL", + "weight": 9.0 + }, + { + "source": "THE END OF IT", + "target": "A CHRISTMAS CAROL", + "weight": 9.0 + }, + { + "source": "PHILADELPHIA", + "target": "GREAT BRITAIN", + "weight": 6.0 + }, + { + "source": "NEW YORK", + "target": "GREAT BRITAIN", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "MARLEY", + "weight": 32.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE AND MARLEY", + "weight": 18.0 + }, + { + "source": "MARLEY", + "target": "SCROOGE AND MARLEY", + "weight": 9.0 + }, + { + "source": "MARLEY'S FUNERAL", + "target": "MARLEY", + "weight": 10.0 + }, + { + "source": "MARLEY'S FUNERAL", + "target": "SCROOGE", + "weight": 8.0 + }, + { + "source": "ST. PAUL'S CHURCHYARD", + "target": "MARLEY", + "weight": 1.0 + }, + { + "source": "FEZZIWIG", + "target": "MRS. FEZZIWIG", + "weight": 27.0 + }, + { + "source": "FEZZIWIG", + "target": "SCROOGE", + "weight": 15.0 + }, + { + "source": "FRED", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "BOB CRATCHIT", + "target": "SCROOGE", + "weight": 8.0 + }, + { + "source": "TIM", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "TIM", + "target": "SCROOGE", + "weight": 8.0 + }, + { + "source": "JOE", + "target": "THE WOMAN", + "weight": 15.0 + }, + { + "source": "JOE", + "target": "SCROOGE", + "weight": 6.0 + }, + { + "source": "THE WOMAN", + "target": "SCROOGE", + "weight": 6.0 + }, + { + "source": "THE CLERGYMAN", + "target": "MARLEY'S FUNERAL", + "weight": 7.0 + }, + { + "source": "THE CLERK", + "target": "MARLEY'S FUNERAL", + "weight": 7.0 + }, + { + "source": "THE UNDERTAKER", + "target": "MARLEY'S FUNERAL", + "weight": 7.0 + }, + { + "source": "THE CHIEF MOURNER", + "target": "MARLEY'S FUNERAL", + "weight": 7.0 + }, + { + "source": "OLD SCRATCH", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "LONDON", + "target": "SCROOGE", + "weight": 15.0 + }, + { + "source": "LONDON", + "target": "SCROOGE AND MARLEY", + "weight": 8.0 + }, + { + "source": "CHRISTMAS", + "target": "SCROOGE", + "weight": 17.0 + }, + { + "source": "CHRISTMAS", + "target": "FRED", + "weight": 8.0 + }, + { + "source": "CHRISTMAS", + "target": "FEZZIWIG", + "weight": 8.0 + }, + { + "source": "SCROOGE'S FUNERAL", + "target": "SCROOGE", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S NEPHEW", + "weight": 15.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S CLERK", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "COUNTING-HOUSE", + "weight": 9.0 + }, + { + "source": "COUNTING-HOUSE", + "target": "SCROOGE'S CLERK", + "weight": 7.0 + }, + { + "source": "COUNTING-HOUSE", + "target": "THE CITY", + "weight": 5.0 + }, + { + "source": "CHRISTMAS EVE", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "CHRISTMAS EVE", + "target": "SCROOGE'S NEPHEW", + "weight": 1.0 + }, + { + "source": "BLIND MEN", + "target": "BLIND MEN'S DOGS", + "weight": 8.0 + }, + { + "source": "BLIND MEN", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "BLIND MEN'S DOGS", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "THE COURT", + "target": "COUNTING-HOUSE", + "weight": 6.0 + }, + { + "source": "NEIGHBOURING OFFICES", + "target": "COUNTING-HOUSE", + "weight": 6.0 + }, + { + "source": "THE HOUSES OPPOSITE", + "target": "COUNTING-HOUSE", + "weight": 5.0 + }, + { + "source": "NATURE", + "target": "THE COURT", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "JACOB MARLEY", + "weight": 43.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "BOB CRATCHIT", + "weight": 26.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "CHRISTMAS", + "weight": 16.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CHRISTMAS", + "weight": 6.0 + }, + { + "source": "PORTLY GENTLEMEN", + "target": "EBENEZER SCROOGE", + "weight": 6.0 + }, + { + "source": "PORTLY GENTLEMEN", + "target": "CHRISTMAS", + "weight": 6.0 + }, + { + "source": "BEDLAM", + "target": "EBENEZER SCROOGE", + "weight": 3.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "NEW YEAR", + "weight": 1.0 + }, + { + "source": "SCROOGE'S OFFICE", + "target": "SCROOGE AND MARLEY", + "weight": 8.0 + }, + { + "source": "SCROOGE'S CLERK", + "target": "BOB CRATCHIT", + "weight": 10.0 + }, + { + "source": "SCROOGE'S CLERK", + "target": "SCROOGE'S WIFE", + "weight": 8.0 + }, + { + "source": "SCROOGE'S CLERK", + "target": "SCROOGE'S FAMILY", + "weight": 8.0 + }, + { + "source": "PORTLY GENTLEMEN", + "target": "CREDENTIALS", + "weight": 7.0 + }, + { + "source": "PORTLY GENTLEMEN", + "target": "POOR AND DESTITUTE", + "weight": 9.0 + }, + { + "source": "FESTIVE SEASON", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "FESTIVE SEASON", + "target": "NEW YEAR", + "weight": 8.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "PARLIAMENT", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "GENTLEMAN", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "CLERK", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "COUNTING-HOUSE", + "weight": 27.0 + }, + { + "source": "SCROOGE", + "target": "UNION WORKHOUSES", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "PRISONS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "TREADMILL", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "POOR LAW", + "weight": 6.0 + }, + { + "source": "LORD MAYOR", + "target": "MANSION HOUSE", + "weight": 9.0 + }, + { + "source": "LORD MAYOR", + "target": "TAILOR", + "weight": 7.0 + }, + { + "source": "TAILOR", + "target": "GARRET", + "weight": 8.0 + }, + { + "source": "SINGER", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "CHRISTMAS", + "target": "CHRISTMAS CAROL", + "weight": 8.0 + }, + { + "source": "MAIN STREET", + "target": "COURT", + "weight": 7.0 + }, + { + "source": "CHURCH", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "ST. DUNSTAN", + "target": "EVIL SPIRIT", + "weight": 1.0 + }, + { + "source": "POOR", + "target": "DESTIUTE", + "weight": 9.0 + }, + { + "source": "GENTLEMAN", + "target": "POOR", + "weight": 8.0 + }, + { + "source": "GENTLEMAN", + "target": "DESTIUTE", + "weight": 8.0 + }, + { + "source": "LABOURERS", + "target": "MAIN STREET", + "weight": 9.0 + }, + { + "source": "RAGGED MEN AND BOYS", + "target": "LABOURERS", + "weight": 8.0 + }, + { + "source": "RAGGED MEN AND BOYS", + "target": "MAIN STREET", + "weight": 8.0 + }, + { + "source": "PEOPLE", + "target": "MAIN STREET", + "weight": 7.0 + }, + { + "source": "FIFTY COOKS AND BUTLERS", + "target": "LORD MAYOR", + "weight": 9.0 + }, + { + "source": "TAILOR'S WIFE", + "target": "TAILOR", + "weight": 9.0 + }, + { + "source": "BABY", + "target": "TAILOR'S WIFE", + "weight": 9.0 + }, + { + "source": "TAILOR'S WIFE", + "target": "BABY", + "weight": 9.0 + }, + { + "source": "TAILOR'S WIFE", + "target": "GARRET", + "weight": 8.0 + }, + { + "source": "SHOPS", + "target": "MAIN STREET", + "weight": 7.0 + }, + { + "source": "POULTERERS", + "target": "SHOPS", + "weight": 8.0 + }, + { + "source": "GROCERS", + "target": "SHOPS", + "weight": 8.0 + }, + { + "source": "ESTABLISHMENTS", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "CITY", + "target": "MAIN STREET", + "weight": 8.0 + }, + { + "source": "CITY", + "target": "COURT", + "weight": 8.0 + }, + { + "source": "MONDAY", + "target": "TAILOR", + "weight": 8.0 + }, + { + "source": "PREVIOUS MONDAY", + "target": "TAILOR", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "BOB CRATCHIT", + "weight": 34.0 + }, + { + "source": "BOB CRATCHIT", + "target": "COUNTING-HOUSE", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "CITY OF LONDON", + "weight": 6.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CORNHILL", + "weight": 5.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CAMDEN TOWN", + "weight": 6.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CHRISTMAS EVE", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS EVE", + "weight": 6.0 + }, + { + "source": "CITY OF LONDON", + "target": "CORPORATION, ALDERMEN, AND LIVERY", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "GENIUS OF THE WEATHER", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S CHAMBERS", + "weight": 10.0 + }, + { + "source": "SCROOGE'S CHAMBERS", + "target": "YARD (SCROOGE'S HOUSE)", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "TAVERN", + "weight": 6.0 + }, + { + "source": "SCROOGE'S CHAMBERS", + "target": "WINE-MERCHANT'S CELLARS", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "BANKER'S BOOK", + "weight": 5.0 + }, + { + "source": "SCROOGE'S CHAMBERS", + "target": "COUNTING-HOUSE", + "weight": 4.0 + }, + { + "source": "SCROOGE'S CHAMBERS", + "target": "CITY OF LONDON", + "weight": 7.0 + }, + { + "source": "TAVERN", + "target": "CITY OF LONDON", + "weight": 4.0 + }, + { + "source": "YARD (SCROOGE'S HOUSE)", + "target": "CITY OF LONDON", + "weight": 4.0 + }, + { + "source": "WINE-MERCHANT'S CELLARS", + "target": "CITY OF LONDON", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S HOUSE", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "MARLEY'S GHOST", + "weight": 10.0 + }, + { + "source": "MARLEY", + "target": "MARLEY'S GHOST", + "weight": 10.0 + }, + { + "source": "MARLEY'S GHOST", + "target": "WINE-MERCHANT'S CELLAR", + "weight": 7.0 + }, + { + "source": "DUTCH MERCHANT", + "target": "SCROOGE'S HOUSE", + "weight": 5.0 + }, + { + "source": "SCRIPTURES", + "target": "SCROOGE'S HOUSE", + "weight": 6.0 + }, + { + "source": "DUTCH MERCHANT", + "target": "SCRIPTURES", + "weight": 1.0 + }, + { + "source": "CAIN", + "target": "ABEL", + "weight": 8.0 + }, + { + "source": "PHARAOH'S DAUGHTERS", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "QUEENS OF SHEBA", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "ANGELIC MESSENGERS", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "ABRAHAM", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "BELSHAZZAR", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "APOSTLES", + "target": "SCRIPTURES", + "weight": 7.0 + }, + { + "source": "STREET", + "target": "SCROOGE'S HOUSE", + "weight": 6.0 + }, + { + "source": "ACT OF PARLIAMENT", + "target": "SCROOGE", + "weight": 3.0 + }, + { + "source": "HEARSE", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "BEDROOM", + "target": "SCROOGE'S HOUSE", + "weight": 8.0 + }, + { + "source": "SITTING-ROOM", + "target": "SCROOGE'S HOUSE", + "weight": 8.0 + }, + { + "source": "LUMBER-ROOM", + "target": "SCROOGE'S HOUSE", + "weight": 8.0 + }, + { + "source": "CLOSET", + "target": "SCROOGE'S HOUSE", + "weight": 8.0 + }, + { + "source": "CHAMBER", + "target": "BUILDING", + "weight": 7.0 + }, + { + "source": "BUILDING", + "target": "SCROOGE'S HOUSE", + "weight": 1.0 + }, + { + "source": "JACOB MARLEY", + "target": "MARLEY'S GHOST", + "weight": 20.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "MARLEY'S GHOST", + "weight": 10.0 + }, + { + "source": "SCROOGE'S ROOM", + "target": "EBENEZER SCROOGE", + "weight": 10.0 + }, + { + "source": "SCROOGE'S ROOM", + "target": "JACOB MARLEY", + "weight": 8.0 + }, + { + "source": "FIREPLACE", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "FIREPLACE", + "target": "JACOB MARLEY", + "weight": 7.0 + }, + { + "source": "CHAIR", + "target": "JACOB MARLEY", + "weight": 6.0 + }, + { + "source": "CHAIN", + "target": "JACOB MARLEY", + "weight": 10.0 + }, + { + "source": "BANDAGE", + "target": "JACOB MARLEY", + "weight": 8.0 + }, + { + "source": "SPECTRE'S CRY", + "target": "JACOB MARLEY", + "weight": 8.0 + }, + { + "source": "SPECTRE'S CRY", + "target": "EBENEZER SCROOGE", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE GHOST OF JACOB MARLEY", + "weight": 10.0 + }, + { + "source": "JACOB MARLEY", + "target": "THE GHOST OF JACOB MARLEY", + "weight": 10.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE COUNTING-HOUSE", + "weight": 8.0 + }, + { + "source": "JACOB MARLEY", + "target": "THE COUNTING-HOUSE", + "weight": 8.0 + }, + { + "source": "THE GHOST OF JACOB MARLEY", + "target": "CHRISTMAS EVE", + "weight": 7.0 + }, + { + "source": "THE GHOST OF JACOB MARLEY", + "target": "THE STAR", + "weight": 6.0 + }, + { + "source": "THE GHOST OF JACOB MARLEY", + "target": "THE WARD", + "weight": 1.0 + }, + { + "source": "WISE MEN", + "target": "THE STAR", + "weight": 8.0 + }, + { + "source": "FELLOW-MEN", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "MANKIND", + "target": "JACOB MARLEY", + "weight": 8.0 + }, + { + "source": "CHRISTIAN SPIRIT", + "target": "FELLOW-MEN", + "weight": 7.0 + }, + { + "source": "POOR HOMES", + "target": "THE STAR", + "weight": 6.0 + }, + { + "source": "EARTH", + "target": "THE GHOST OF JACOB MARLEY", + "weight": 9.0 + }, + { + "source": "ETERNITY", + "target": "THE GHOST OF JACOB MARLEY", + "weight": 1.0 + }, + { + "source": "JACOB MARLEY", + "target": "EBENEZER SCROOGE", + "weight": 9.0 + }, + { + "source": "MARLEY'S GHOST", + "target": "EBENEZER SCROOGE", + "weight": 9.0 + }, + { + "source": "THREE SPIRITS", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "PHANTOMS", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "CHURCH", + "target": "EBENEZER SCROOGE", + "weight": 3.0 + }, + { + "source": "INVISIBLE WORLD", + "target": "EBENEZER SCROOGE", + "weight": 6.0 + }, + { + "source": "THE FIRST OF THE THREE SPIRITS", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "THREE SPIRITS", + "target": "THE FIRST OF THE THREE SPIRITS", + "weight": 1.0 + }, + { + "source": "OLD GHOST IN WHITE WAISTCOAT", + "target": "WRETCHED WOMAN", + "weight": 7.0 + }, + { + "source": "WRETCHED WOMAN", + "target": "INFANT", + "weight": 8.0 + }, + { + "source": "OLD GHOST IN WHITE WAISTCOAT", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "PHANTOMS", + "target": "GUILTY GOVERNMENTS", + "weight": 6.0 + }, + { + "source": "PHANTOMS", + "target": "WINDOW", + "weight": 7.0 + }, + { + "source": "MARLEY'S GHOST", + "target": "DOOR", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S CHAMBER", + "weight": 8.0 + }, + { + "source": "MARLEY'S GHOST", + "target": "NIGHT", + "weight": 7.0 + }, + { + "source": "BELL", + "target": "THREE SPIRITS", + "weight": 8.0 + }, + { + "source": "CLOCK", + "target": "SCROOGE", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE GHOST OF CHRISTMAS PAST", + "weight": 10.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S BEDROOM", + "weight": 8.0 + }, + { + "source": "THE GHOST OF CHRISTMAS PAST", + "target": "SCROOGE'S BEDROOM", + "weight": 8.0 + }, + { + "source": "THE FIRST OF EXCHANGE", + "target": "UNITED STATES SECURITY", + "weight": 5.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE FIRST OF EXCHANGE", + "weight": 1.0 + }, + { + "source": "THE CLOCK", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "THE BELL", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "THE CURTAINS", + "target": "THE GHOST OF CHRISTMAS PAST", + "weight": 9.0 + }, + { + "source": "THE SUN", + "target": "EBENEZER SCROOGE", + "weight": 5.0 + }, + { + "source": "THE WORLD", + "target": "EBENEZER SCROOGE", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "GHOST OF CHRISTMAS PAST", + "weight": 19.0 + }, + { + "source": "SCROOGE", + "target": "CITY", + "weight": 24.0 + }, + { + "source": "SCROOGE", + "target": "MARKET-TOWN", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "SCHOOL", + "weight": 24.0 + }, + { + "source": "MARKET-TOWN", + "target": "SCHOOL", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS", + "weight": 61.0 + }, + { + "source": "GHOST OF CHRISTMAS PAST", + "target": "CHRISTMAS", + "weight": 1.0 + }, + { + "source": "BOYS", + "target": "FARMERS", + "weight": 6.0 + }, + { + "source": "BOYS", + "target": "MARKET-TOWN", + "weight": 7.0 + }, + { + "source": "BOYS", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "FRIENDS", + "target": "SCHOOL", + "weight": 7.0 + }, + { + "source": "ROAD", + "target": "FIELDS", + "weight": 8.0 + }, + { + "source": "ROAD", + "target": "LANE", + "weight": 7.0 + }, + { + "source": "MARKET-TOWN", + "target": "BRIDGE", + "weight": 8.0 + }, + { + "source": "MARKET-TOWN", + "target": "CHURCH", + "weight": 8.0 + }, + { + "source": "MARKET-TOWN", + "target": "RIVER", + "weight": 8.0 + }, + { + "source": "NIGHT", + "target": "SCROOGE", + "weight": 8.0 + }, + { + "source": "NIGHT", + "target": "GHOST OF CHRISTMAS PAST", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "GHOST", + "weight": 34.0 + }, + { + "source": "SCROOGE", + "target": "MANSION", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "ALI BABA", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "VALENTINE", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "ORSON", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "SULTAN'S GROOM", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "GENII", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "PRINCESS", + "weight": 3.0 + }, + { + "source": "SCROOGE", + "target": "ROBIN CRUSOE", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "PARROT", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "FRIDAY", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "DAMASCUS", + "weight": 3.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS CAROL", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "JOLLY HOLIDAYS", + "weight": 6.0 + }, + { + "source": "VALENTINE", + "target": "ORSON", + "weight": 8.0 + }, + { + "source": "SULTAN'S GROOM", + "target": "GENII", + "weight": 7.0 + }, + { + "source": "SULTAN'S GROOM", + "target": "PRINCESS", + "weight": 6.0 + }, + { + "source": "ROBIN CRUSOE", + "target": "PARROT", + "weight": 8.0 + }, + { + "source": "ROBIN CRUSOE", + "target": "FRIDAY", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "SOLITARY CHILD", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "BOY SINGING AT SCROOGE'S DOOR", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "SPIRIT", + "weight": 36.0 + }, + { + "source": "SCROOGE", + "target": "OFFICES", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "STABLES", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "COACH-HOUSES", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "SHEDS", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "STOREHOUSE", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "HIGH-ROAD", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "LANE", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "YARD", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "HALL", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "ROOM", + "weight": 8.0 + }, + { + "source": "ROBIN CRUSOE", + "target": "ISLAND", + "weight": 7.0 + }, + { + "source": "FRIDAY", + "target": "CREEK", + "weight": 7.0 + }, + { + "source": "MANSION", + "target": "OFFICES", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "STABLES", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "COACH-HOUSES", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "SHEDS", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "STOREHOUSE", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "YARD", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "HALL", + "weight": 8.0 + }, + { + "source": "MANSION", + "target": "ROOM", + "weight": 8.0 + }, + { + "source": "SCHOOL", + "target": "SOLITARY CHILD", + "weight": 9.0 + }, + { + "source": "SPIRIT", + "target": "GHOST", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "FAN", + "weight": 9.0 + }, + { + "source": "FAN", + "target": "SCROOGE'S NEPHEW", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S NEPHEW", + "weight": 23.0 + }, + { + "source": "SCROOGE", + "target": "SCHOOLMASTER", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "DICK WILKINS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "FEZZIWIG", + "weight": 8.0 + }, + { + "source": "FEZZIWIG", + "target": "WAREHOUSE", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "WAREHOUSE", + "weight": 8.0 + }, + { + "source": "DICK WILKINS", + "target": "WAREHOUSE", + "weight": 8.0 + }, + { + "source": "SCHOOLMASTER", + "target": "SCHOOL", + "weight": 10.0 + }, + { + "source": "FAN", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "FEZZIWIG", + "target": "CHRISTMAS", + "weight": 1.0 + }, + { + "source": "SCHOOLMASTER", + "target": "SERVANT", + "weight": 7.0 + }, + { + "source": "SERVANT", + "target": "POSTBOY", + "weight": 6.0 + }, + { + "source": "FATHER", + "target": "FAN", + "weight": 9.0 + }, + { + "source": "FATHER", + "target": "SCROOGE", + "weight": 9.0 + }, + { + "source": "POSTBOY", + "target": "CHAIR/COACH", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "CHAIR/COACH", + "weight": 7.0 + }, + { + "source": "FAN", + "target": "CHAIR/COACH", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "PARLOUR", + "weight": 6.0 + }, + { + "source": "FAN", + "target": "PARLOUR", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "GARDEN SWEEP", + "weight": 5.0 + }, + { + "source": "FAN", + "target": "GARDEN SWEEP", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "DICK WILKINS", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "FEZZIWIG", + "weight": 9.0 + }, + { + "source": "DICK WILKINS", + "target": "FEZZIWIG", + "weight": 8.0 + }, + { + "source": "FEZZIWIG", + "target": "MISS FEZZIWIGS", + "weight": 7.0 + }, + { + "source": "FEZZIWIG", + "target": "FEZZIWIG'S WAREHOUSE", + "weight": 10.0 + }, + { + "source": "FEZZIWIG'S WAREHOUSE", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 10.0 + }, + { + "source": "FEZZIWIG", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 10.0 + }, + { + "source": "MRS. FEZZIWIG", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 8.0 + }, + { + "source": "MISS FEZZIWIGS", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE FIDDLER", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 8.0 + }, + { + "source": "DICK WILKINS", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 1.0 + }, + { + "source": "THE GHOST", + "target": "EBENEZER SCROOGE", + "weight": 18.0 + }, + { + "source": "THE HOUSEMAID", + "target": "THE BAKER", + "weight": 7.0 + }, + { + "source": "THE COOK", + "target": "THE MILKMAN", + "weight": 7.0 + }, + { + "source": "THE BOY FROM OVER THE WAY", + "target": "THE GIRL FROM NEXT DOOR BUT ONE", + "weight": 6.0 + }, + { + "source": "YOUNG MEN AND WOMEN EMPLOYED IN THE BUSINESS", + "target": "FEZZIWIG'S WAREHOUSE", + "weight": 10.0 + }, + { + "source": "THE THREE MISS FEZZIWIGS' SIX YOUNG FOLLOWERS", + "target": "MISS FEZZIWIGS", + "weight": 7.0 + }, + { + "source": "THE HOUSEMAID", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE BAKER", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE COOK", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE MILKMAN", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE BOY FROM OVER THE WAY", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "THE GIRL FROM NEXT DOOR BUT ONE", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 7.0 + }, + { + "source": "YOUNG MEN AND WOMEN EMPLOYED IN THE BUSINESS", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 10.0 + }, + { + "source": "THE THREE MISS FEZZIWIGS' SIX YOUNG FOLLOWERS", + "target": "CHRISTMAS EVE PARTY AT FEZZIWIG'S", + "weight": 1.0 + }, + { + "source": "FEZZIWIG", + "target": "THE TWO APPRENTICES", + "weight": 8.0 + }, + { + "source": "FEZZIWIG", + "target": "DICK", + "weight": 7.0 + }, + { + "source": "FEZZIWIG", + "target": "THE DOMESTIC BALL", + "weight": 9.0 + }, + { + "source": "MRS. FEZZIWIG", + "target": "THE DOMESTIC BALL", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "DICK", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "THE GHOST OF CHRISTMAS PAST", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "THE FAIR YOUNG GIRL", + "weight": 8.0 + }, + { + "source": "THE DOMESTIC BALL", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "THE TWO APPRENTICES", + "target": "BACK-SHOP", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "BACK-SHOP", + "weight": 6.0 + }, + { + "source": "DICK", + "target": "BACK-SHOP", + "weight": 1.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "YOUNG SCROOGE", + "weight": 8.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "OLDER SCROOGE", + "weight": 7.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "SPIRIT", + "weight": 6.0 + }, + { + "source": "YOUNG SCROOGE", + "target": "OLDER SCROOGE", + "weight": 9.0 + }, + { + "source": "OLDER SCROOGE", + "target": "SPIRIT", + "weight": 9.0 + }, + { + "source": "OLDER SCROOGE", + "target": "SCROOGE'S CLERK", + "weight": 7.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "THE SHOP", + "weight": 8.0 + }, + { + "source": "MR. FEZZIWIG", + "target": "THE DOOR", + "weight": 7.0 + }, + { + "source": "MRS. FEZZIWIG", + "target": "THE DOOR", + "weight": 7.0 + }, + { + "source": "THE SHOP", + "target": "BACK-SHOP", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "THE GIRL", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "THE GHOST", + "weight": 17.0 + }, + { + "source": "THE GIRL", + "target": "THE MATRON", + "weight": 10.0 + }, + { + "source": "THE MATRON", + "target": "THE DAUGHTER", + "weight": 10.0 + }, + { + "source": "THE MATRON", + "target": "THE FATHER", + "weight": 9.0 + }, + { + "source": "THE FATHER", + "target": "CHRISTMAS", + "weight": 7.0 + }, + { + "source": "THE MATRON", + "target": "HOME", + "weight": 7.0 + }, + { + "source": "THE DAUGHTER", + "target": "HOME", + "weight": 7.0 + }, + { + "source": "THE MATRON", + "target": "CHRISTMAS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "HOME", + "weight": 1.0 + }, + { + "source": "CHILDREN", + "target": "THE MATRON", + "weight": 9.0 + }, + { + "source": "CHILDREN", + "target": "THE DAUGHTER", + "weight": 8.0 + }, + { + "source": "CHILDREN", + "target": "THE FATHER", + "weight": 9.0 + }, + { + "source": "CHILDREN", + "target": "PORTER", + "weight": 7.0 + }, + { + "source": "PORTER", + "target": "THE FATHER", + "weight": 6.0 + }, + { + "source": "WINTER FIRE", + "target": "HOME", + "weight": 7.0 + }, + { + "source": "WINTER FIRE", + "target": "CHRISTMAS", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "BELLE", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "JACOB MARLEY", + "weight": 20.0 + }, + { + "source": "JACOB MARLEY", + "target": "THE SECOND OF THE THREE SPIRITS", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "THE SECOND OF THE THREE SPIRITS", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S OFFICE", + "weight": 13.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S BEDROOM", + "weight": 7.0 + }, + { + "source": "CHRISTMAS", + "target": "CHRISTMAS TOYS AND PRESENTS", + "weight": 8.0 + }, + { + "source": "BELLE", + "target": "BELLE'S HUSBAND", + "weight": 9.0 + }, + { + "source": "BELLE'S HUSBAND", + "target": "BELLE'S DAUGHTER", + "weight": 9.0 + }, + { + "source": "BELLE", + "target": "BELLE'S DAUGHTER", + "weight": 9.0 + }, + { + "source": "BELLE", + "target": "BELLE'S FAMILY", + "weight": 9.0 + }, + { + "source": "BELLE'S HUSBAND", + "target": "BELLE'S FAMILY", + "weight": 9.0 + }, + { + "source": "BELLE'S DAUGHTER", + "target": "BELLE'S FAMILY", + "weight": 9.0 + }, + { + "source": "THE PORTER", + "target": "THE CHILDREN", + "weight": 8.0 + }, + { + "source": "THE BABY", + "target": "THE CHILDREN", + "weight": 7.0 + }, + { + "source": "THE HOUSE", + "target": "THE PARLOUR", + "weight": 7.0 + }, + { + "source": "THE HOUSE", + "target": "THE TOP OF THE HOUSE", + "weight": 7.0 + }, + { + "source": "THE FIRESIDE", + "target": "BELLE'S FAMILY", + "weight": 8.0 + }, + { + "source": "THE EXTINQUISHER-CAP", + "target": "GHOST", + "weight": 8.0 + }, + { + "source": "THE BELL", + "target": "THE SECOND OF THE THREE SPIRITS", + "weight": 8.0 + }, + { + "source": "THE PARLOUR", + "target": "THE CHILDREN", + "weight": 7.0 + }, + { + "source": "THE TOP OF THE HOUSE", + "target": "THE CHILDREN", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 23.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S ROOM", + "weight": 8.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "SCROOGE'S ROOM", + "weight": 7.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "CHRISTMAS", + "weight": 10.0 + }, + { + "source": "CITY", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "CITY", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 1.0 + }, + { + "source": "HOLLY", + "target": "SCROOGE'S ROOM", + "weight": 7.0 + }, + { + "source": "MISTLETOE", + "target": "SCROOGE'S ROOM", + "weight": 7.0 + }, + { + "source": "IVY", + "target": "SCROOGE'S ROOM", + "weight": 7.0 + }, + { + "source": "PLENTY'S HORN", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "CHIMNEY", + "target": "SCROOGE'S ROOM", + "weight": 6.0 + }, + { + "source": "BED", + "target": "SCROOGE'S ROOM", + "weight": 6.0 + }, + { + "source": "LOCK", + "target": "DOOR", + "weight": 5.0 + }, + { + "source": "DOOR", + "target": "SCROOGE'S ROOM", + "weight": 6.0 + }, + { + "source": "HEARTH", + "target": "SCROOGE'S ROOM", + "weight": 6.0 + }, + { + "source": "THRONE", + "target": "SCROOGE'S ROOM", + "weight": 6.0 + }, + { + "source": "ANTIQUE SCABBARD", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "SPIRIT'S FAMILY", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 8.0 + }, + { + "source": "YOUNGER MEMBERS OF SPIRIT'S FAMILY", + "target": "SPIRIT'S FAMILY", + "weight": 7.0 + }, + { + "source": "ELDER BROTHERS OF SPIRIT'S FAMILY", + "target": "SPIRIT'S FAMILY", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "SPIRIT'S FAMILY", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS MORNING", + "weight": 8.0 + }, + { + "source": "GROCERS", + "target": "CHRISTMAS MORNING", + "weight": 7.0 + }, + { + "source": "POULTERERS", + "target": "CHRISTMAS MORNING", + "weight": 7.0 + }, + { + "source": "FRUITERERS", + "target": "CHRISTMAS MORNING", + "weight": 7.0 + }, + { + "source": "GREAT BRITAIN", + "target": "CHRISTMAS MORNING", + "weight": 1.0 + }, + { + "source": "CITY STREETS", + "target": "GREAT BRITAIN", + "weight": 7.0 + }, + { + "source": "HOUSE-TOPS", + "target": "CITY STREETS", + "weight": 6.0 + }, + { + "source": "POULTERERS' SHOPS", + "target": "POULTERERS", + "weight": 8.0 + }, + { + "source": "FRUITERERS' SHOPS", + "target": "FRUITERERS", + "weight": 8.0 + }, + { + "source": "SHOPKEEPERS", + "target": "GROCERS", + "weight": 8.0 + }, + { + "source": "GROCER", + "target": "GROCERS", + "weight": 9.0 + }, + { + "source": "CUSTOMERS", + "target": "GROCERS", + "weight": 8.0 + }, + { + "source": "CUSTOMERS", + "target": "POULTERERS' SHOPS", + "weight": 7.0 + }, + { + "source": "CUSTOMERS", + "target": "FRUITERERS' SHOPS", + "weight": 7.0 + }, + { + "source": "BOYS", + "target": "CITY STREETS", + "weight": 7.0 + }, + { + "source": "CHRISTMAS MORNING", + "target": "CHRISTMAS", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "CITY STREETS", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "SHOPKEEPERS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "CUSTOMERS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "BOYS", + "weight": 1.0 + }, + { + "source": "SPIRIT", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "BOB CRATCHIT", + "target": "MRS. CRATCHIT", + "weight": 57.0 + }, + { + "source": "BOB CRATCHIT", + "target": "BELINDA CRATCHIT", + "weight": 22.0 + }, + { + "source": "BOB CRATCHIT", + "target": "PETER CRATCHIT", + "weight": 50.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "BELINDA CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "PETER CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "BAKERS' SHOPS", + "weight": 7.0 + }, + { + "source": "BAKERS' SHOPS", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "CHURCH", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "SUBURBS OF THE TOWN", + "target": "CRATCHIT FAMILY", + "weight": 6.0 + }, + { + "source": "PARKS", + "target": "PETER CRATCHIT", + "weight": 5.0 + }, + { + "source": "SEVENTH DAY", + "target": "BAKERS' SHOPS", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "SPIRIT", + "weight": 10.0 + }, + { + "source": "DINNER-CARRIERS", + "target": "BAKERS' SHOPS", + "weight": 8.0 + }, + { + "source": "GROCER", + "target": "GROCER'S PEOPLE", + "weight": 9.0 + }, + { + "source": "GROCER", + "target": "TOWN", + "weight": 7.0 + }, + { + "source": "GROCER'S PEOPLE", + "target": "TOWN", + "weight": 7.0 + }, + { + "source": "CHAPEL", + "target": "CHURCH", + "weight": 8.0 + }, + { + "source": "CHAPEL", + "target": "TOWN", + "weight": 7.0 + }, + { + "source": "CHURCH", + "target": "TOWN", + "weight": 7.0 + }, + { + "source": "BOY CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "GIRL CRATCHIT", + "target": "CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "BOY CRATCHIT", + "target": "GIRL CRATCHIT", + "weight": 8.0 + }, + { + "source": "BOY CRATCHIT", + "target": "PETER CRATCHIT", + "weight": 8.0 + }, + { + "source": "GIRL CRATCHIT", + "target": "PETER CRATCHIT", + "weight": 8.0 + }, + { + "source": "BOY CRATCHIT", + "target": "BELINDA CRATCHIT", + "weight": 8.0 + }, + { + "source": "GIRL CRATCHIT", + "target": "BELINDA CRATCHIT", + "weight": 8.0 + }, + { + "source": "BOY CRATCHIT", + "target": "MRS. CRATCHIT", + "weight": 10.0 + }, + { + "source": "GIRL CRATCHIT", + "target": "MRS. CRATCHIT", + "weight": 10.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "DINNER-CARRIERS", + "weight": 8.0 + }, + { + "source": "GHOST OF CHRISTMAS PRESENT", + "target": "CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "TOWN", + "target": "SUBURBS OF THE TOWN", + "weight": 1.0 + }, + { + "source": "BOB CRATCHIT", + "target": "TINY TIM", + "weight": 78.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "TINY TIM", + "weight": 35.0 + }, + { + "source": "BOB CRATCHIT", + "target": "MARTHA CRATCHIT", + "weight": 16.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "MARTHA CRATCHIT", + "weight": 16.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "PETER CRATCHIT", + "weight": 23.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "BELINDA CRATCHIT", + "weight": 7.0 + }, + { + "source": "BOB CRATCHIT", + "target": "YOUNG CRATCHITS", + "weight": 15.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "YOUNG CRATCHITS", + "weight": 7.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "CHRISTMAS DAY", + "weight": 10.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "CHRISTMAS DINNER", + "weight": 20.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CHURCH", + "weight": 13.0 + }, + { + "source": "TINY TIM", + "target": "CHURCH", + "weight": 13.0 + }, + { + "source": "CHRISTMAS DAY", + "target": "CHRISTMAS DINNER", + "weight": 1.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "LONDON", + "weight": 8.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "BAKER'S SHOP", + "weight": 7.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "GOOSE", + "weight": 9.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "CHRISTMAS", + "weight": 18.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "PARKS", + "weight": 4.0 + }, + { + "source": "BAKER'S SHOP", + "target": "GOOSE", + "weight": 8.0 + }, + { + "source": "LONDON", + "target": "CHURCH", + "weight": 7.0 + }, + { + "source": "CHRISTMAS", + "target": "CHURCH", + "weight": 7.0 + }, + { + "source": "GOOSE", + "target": "CHRISTMAS DINNER", + "weight": 9.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "CHURCH", + "weight": 1.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "BOB CRATCHIT", + "weight": 29.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "MRS. CRATCHIT", + "weight": 29.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "TINY TIM", + "weight": 29.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "BELINDA CRATCHIT", + "weight": 8.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "YOUNG CRATCHITS", + "weight": 8.0 + }, + { + "source": "CHRISTMAS DINNER", + "target": "CHRISTMAS", + "weight": 9.0 + }, + { + "source": "MRS. CRATCHIT", + "target": "BACK-YARD", + "weight": 5.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "HEARTH", + "weight": 8.0 + }, + { + "source": "TINY TIM", + "target": "POOR CHIMNEY CORNER", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "TINY TIM", + "weight": 9.0 + }, + { + "source": "THE GHOST", + "target": "TINY TIM", + "weight": 8.0 + }, + { + "source": "THE GHOST", + "target": "FUTURE", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "FUTURE", + "weight": 1.0 + }, + { + "source": "MR. SCROOGE", + "target": "FOUNDER OF THE FEAST", + "weight": 8.0 + }, + { + "source": "COPPER", + "target": "CHRISTMAS PUDDING", + "weight": 7.0 + }, + { + "source": "EATING-HOUSE", + "target": "PASTRY-COOK'S", + "weight": 5.0 + }, + { + "source": "PASTRY-COOK'S", + "target": "LAUNDRESS'S", + "weight": 5.0 + }, + { + "source": "CHRISTMAS HOLLY", + "target": "CHRISTMAS PUDDING", + "weight": 7.0 + }, + { + "source": "JUG", + "target": "GLASS", + "weight": 6.0 + }, + { + "source": "GLASS", + "target": "CUSTARD CUP", + "weight": 6.0 + }, + { + "source": "CHESTNUTS", + "target": "HEARTH", + "weight": 7.0 + }, + { + "source": "APPLE SAUCE", + "target": "CHRISTMAS GOOSE", + "weight": 6.0 + }, + { + "source": "MASHED POTATOES", + "target": "CHRISTMAS GOOSE", + "weight": 6.0 + }, + { + "source": "CHRISTMAS PUDDING", + "target": "CHRISTMAS DINNER", + "weight": 8.0 + }, + { + "source": "CHRISTMAS GOOSE", + "target": "CHRISTMAS DINNER", + "weight": 8.0 + }, + { + "source": "SURPLUS POPULATION", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "SURPLUS POPULATION", + "target": "THE GHOST", + "weight": 1.0 + }, + { + "source": "TINY TIM", + "target": "THE CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "PETER CRATCHIT", + "target": "THE CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "THE CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "THE CRATCHIT FAMILY", + "target": "THE FEAST", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "THE CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "THE SPIRIT", + "target": "SCROOGE", + "weight": 10.0 + }, + { + "source": "THE SPIRIT", + "target": "THE CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "THE MILLINER'S", + "weight": 7.0 + }, + { + "source": "THE LAMPLIGHTER", + "target": "THE STREET", + "weight": 6.0 + }, + { + "source": "THE STREET", + "target": "CHRISTMAS DAY", + "weight": 1.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "COUNTESS", + "weight": 3.0 + }, + { + "source": "MARTHA CRATCHIT", + "target": "LORD", + "weight": 3.0 + }, + { + "source": "LORD", + "target": "PETER CRATCHIT", + "weight": 2.0 + }, + { + "source": "BOB CRATCHIT", + "target": "MASTER PETER", + "weight": 8.0 + }, + { + "source": "THE CHILDREN", + "target": "THE CRATCHIT FAMILY", + "weight": 10.0 + }, + { + "source": "GUESTS", + "target": "HOUSE", + "weight": 7.0 + }, + { + "source": "HANDSOME GIRLS", + "target": "NEIGHBOUR", + "weight": 6.0 + }, + { + "source": "HANDSOME GIRLS", + "target": "SINGLE MAN", + "weight": 2.0 + }, + { + "source": "SPIRIT'S TORCH", + "target": "THE CRATCHIT FAMILY", + "weight": 7.0 + }, + { + "source": "TINY TIM", + "target": "SONG ABOUT A LOST CHILD", + "weight": 7.0 + }, + { + "source": "PETER CRATCHIT", + "target": "PAWNBROKER'S", + "weight": 3.0 + }, + { + "source": "KITCHENS", + "target": "THE STREET", + "weight": 5.0 + }, + { + "source": "PARLOURS", + "target": "THE STREET", + "weight": 5.0 + }, + { + "source": "ROOMS", + "target": "THE STREET", + "weight": 5.0 + }, + { + "source": "HOUSE", + "target": "THE STREET", + "weight": 5.0 + }, + { + "source": "WINDOW-BLINDS", + "target": "HOUSE", + "weight": 4.0 + }, + { + "source": "SNOW", + "target": "THE STREET", + "weight": 5.0 + }, + { + "source": "EVENING", + "target": "THE LAMPLIGHTER", + "weight": 6.0 + }, + { + "source": "FIRE", + "target": "KITCHENS", + "weight": 5.0 + }, + { + "source": "FIRE", + "target": "PARLOURS", + "weight": 5.0 + }, + { + "source": "FIRE", + "target": "ROOMS", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "THE SPIRIT", + "weight": 9.0 + }, + { + "source": "THE SPIRIT", + "target": "MINERS", + "weight": 6.0 + }, + { + "source": "MINERS", + "target": "OLD MAN MINER", + "weight": 8.0 + }, + { + "source": "MINERS", + "target": "MOOR", + "weight": 8.0 + }, + { + "source": "THE SPIRIT", + "target": "LIGHTHOUSE KEEPERS", + "weight": 6.0 + }, + { + "source": "LIGHTHOUSE KEEPERS", + "target": "ELDER LIGHTHOUSE KEEPER", + "weight": 8.0 + }, + { + "source": "LIGHTHOUSE KEEPERS", + "target": "LIGHTHOUSE", + "weight": 9.0 + }, + { + "source": "THE SPIRIT", + "target": "SHIP CREW", + "weight": 6.0 + }, + { + "source": "SHIP CREW", + "target": "SHIP", + "weight": 9.0 + }, + { + "source": "MINERS", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "LIGHTHOUSE KEEPERS", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "SHIP CREW", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "OLD MAN MINER", + "target": "OLD WOMAN MINER", + "weight": 9.0 + }, + { + "source": "OLD MAN MINER", + "target": "CHILDREN OF MINERS", + "weight": 8.0 + }, + { + "source": "OLD WOMAN MINER", + "target": "CHILDREN OF MINERS", + "weight": 8.0 + }, + { + "source": "MINERS", + "target": "CHILDREN OF MINERS", + "weight": 8.0 + }, + { + "source": "MINERS", + "target": "OLD WOMAN MINER", + "weight": 8.0 + }, + { + "source": "MINERS", + "target": "BURIAL-PLACE OF GIANTS", + "weight": 7.0 + }, + { + "source": "MOOR", + "target": "BURIAL-PLACE OF GIANTS", + "weight": 8.0 + }, + { + "source": "MOOR", + "target": "WEST", + "weight": 5.0 + }, + { + "source": "LIGHTHOUSE", + "target": "REEF OF SUNKEN ROCKS", + "weight": 9.0 + }, + { + "source": "LIGHTHOUSE", + "target": "SEA", + "weight": 8.0 + }, + { + "source": "LIGHTHOUSE KEEPERS", + "target": "SEA", + "weight": 8.0 + }, + { + "source": "SHIP", + "target": "SEA", + "weight": 9.0 + }, + { + "source": "SHIP CREW", + "target": "OFFICERS", + "weight": 8.0 + }, + { + "source": "SHIP CREW", + "target": "HELMSMAN", + "weight": 8.0 + }, + { + "source": "SHIP CREW", + "target": "LOOK-OUT", + "weight": 8.0 + }, + { + "source": "OFFICERS", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "HELMSMAN", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "LOOK-OUT", + "target": "CHRISTMAS DAY", + "weight": 7.0 + }, + { + "source": "MINERS", + "target": "CHRISTMAS SONG", + "weight": 8.0 + }, + { + "source": "OLD MAN MINER", + "target": "CHRISTMAS SONG", + "weight": 8.0 + }, + { + "source": "CHRISTMAS SONG", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "CHRISTMAS DAY", + "target": "CHRISTMAS", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "FRED", + "weight": 17.0 + }, + { + "source": "FRED", + "target": "SCROOGE'S NIECE", + "weight": 16.0 + }, + { + "source": "SCROOGE'S NIECE", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "SCROOGE'S NIECE'S SISTERS", + "target": "SCROOGE'S NIECE", + "weight": 7.0 + }, + { + "source": "TOPPER", + "target": "SCROOGE'S NIECE'S SISTERS", + "weight": 6.0 + }, + { + "source": "FRED", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "SCROOGE'S FRIENDS", + "target": "FRED", + "weight": 7.0 + }, + { + "source": "THE LADIES", + "target": "SCROOGE'S NIECE", + "weight": 7.0 + }, + { + "source": "THE PLUMP SISTER", + "target": "TOPPER", + "weight": 8.0 + }, + { + "source": "THE SISTER WITH ROSES", + "target": "SCROOGE'S NIECE'S SISTERS", + "weight": 7.0 + }, + { + "source": "THE HOUSEKEEPERS", + "target": "FRED", + "weight": 5.0 + }, + { + "source": "THE CLERK", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "THE DINNER", + "target": "FRED", + "weight": 7.0 + }, + { + "source": "THE DESSERT", + "target": "THE DINNER", + "weight": 6.0 + }, + { + "source": "THE MUSIC", + "target": "FRED", + "weight": 6.0 + }, + { + "source": "THE TEA", + "target": "THE MUSIC", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S NIECE", + "weight": 7.0 + }, + { + "source": "TOPPER", + "target": "PLUMP SISTER", + "weight": 13.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "SCROOGE'S NIECE", + "weight": 7.0 + }, + { + "source": "LONDON", + "target": "SCROOGE'S NEPHEW", + "weight": 7.0 + }, + { + "source": "SCROOGE'S FAMILY", + "target": "GUESTS", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S FAMILY", + "weight": 8.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "GUESTS", + "weight": 8.0 + }, + { + "source": "SCROOGE'S NIECE", + "target": "BOARDING-SCHOOL", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "BOARDING-SCHOOL", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "SEXT\u00d3N", + "weight": 6.0 + }, + { + "source": "JACOB MARLEY", + "target": "SEXT\u00d3N", + "weight": 6.0 + }, + { + "source": "SCROOGE'S NIECE", + "target": "PIANO", + "weight": 7.0 + }, + { + "source": "GUESTS", + "target": "PIANO", + "weight": 6.0 + }, + { + "source": "TOPPER", + "target": "FIRE-IRONS", + "weight": 6.0 + }, + { + "source": "TOPPER", + "target": "CURTAINS", + "weight": 6.0 + }, + { + "source": "TOPPER", + "target": "CHAIRS", + "weight": 6.0 + }, + { + "source": "TOPPER", + "target": "RING", + "weight": 7.0 + }, + { + "source": "TOPPER", + "target": "CHAIN", + "weight": 7.0 + }, + { + "source": "SCROOGE'S NEPHEW", + "target": "GAME OF YES AND NO", + "weight": 8.0 + }, + { + "source": "GUESTS", + "target": "GAME OF YES AND NO", + "weight": 8.0 + }, + { + "source": "SCROOGE'S NIECE", + "target": "GAME OF HOW, WHEN, AND WHERE", + "weight": 7.0 + }, + { + "source": "GUESTS", + "target": "GAME OF BLIND MAN'S-BUFF", + "weight": 8.0 + }, + { + "source": "GUESTS", + "target": "GAME OF FORFEITS", + "weight": 8.0 + }, + { + "source": "WHITECHAPEL", + "target": "LONDON", + "weight": 7.0 + }, + { + "source": "SPIRIT", + "target": "SCROOGE", + "weight": 8.0 + }, + { + "source": "GAME OF YES AND NO", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "GAME OF BLIND MAN'S-BUFF", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "GAME OF FORFEITS", + "target": "CHRISTMAS", + "weight": 8.0 + }, + { + "source": "GAME OF HOW, WHEN, AND WHERE", + "target": "CHRISTMAS", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "PLUMP SISTER", + "weight": 5.0 + }, + { + "source": "GHOST", + "target": "BOY (IGNORANCE)", + "weight": 8.0 + }, + { + "source": "GHOST", + "target": "GIRL", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "LONDON", + "weight": 16.0 + }, + { + "source": "GHOST", + "target": "ALMSHOUSE", + "weight": 6.0 + }, + { + "source": "GHOST", + "target": "HOSPITAL", + "weight": 6.0 + }, + { + "source": "GHOST", + "target": "GAOL", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "CHRISTMAS HOLIDAYS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "TWELFTH-NIGHT PARTY", + "weight": 5.0 + }, + { + "source": "GHOST", + "target": "CHRISTMAS HOLIDAYS", + "weight": 7.0 + }, + { + "source": "BOY (IGNORANCE)", + "target": "GIRL", + "weight": 1.0 + }, + { + "source": "SPIRIT OF CHRISTMAS PRESENT", + "target": "SCROOGE", + "weight": 9.0 + }, + { + "source": "COMPANY AT FRED'S PARTY", + "target": "FRED", + "weight": 8.0 + }, + { + "source": "COMPANY AT FRED'S PARTY", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "FOREIGN LANDS", + "target": "SPIRIT OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "SICK-BEDS", + "target": "SPIRIT OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "MIDNIGHT", + "target": "SPIRIT OF CHRISTMAS PRESENT", + "weight": 8.0 + }, + { + "source": "THREE-QUARTERS PAST ELEVEN", + "target": "MIDNIGHT", + "weight": 7.0 + }, + { + "source": "CHIMES", + "target": "MIDNIGHT", + "weight": 6.0 + }, + { + "source": "POVERTY", + "target": "SPIRIT OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "MISERY'S REFUGE", + "target": "SPIRIT OF CHRISTMAS PRESENT", + "weight": 6.0 + }, + { + "source": "AUTHORITY", + "target": "MISERY'S REFUGE", + "weight": 5.0 + }, + { + "source": "MAN", + "target": "BOY (IGNORANCE)", + "weight": 8.0 + }, + { + "source": "MAN", + "target": "GIRL", + "weight": 1.0 + }, + { + "source": "SPIRIT", + "target": "IGNORANCE", + "weight": 8.0 + }, + { + "source": "SPIRIT", + "target": "WANT", + "weight": 8.0 + }, + { + "source": "IGNORANCE", + "target": "WANT", + "weight": 7.0 + }, + { + "source": "SPIRIT", + "target": "CITY", + "weight": 7.0 + }, + { + "source": "CITY", + "target": "CHANGE", + "weight": 6.0 + }, + { + "source": "PRISONS", + "target": "CITY", + "weight": 5.0 + }, + { + "source": "WORKHOUSES", + "target": "CITY", + "weight": 5.0 + }, + { + "source": "THE LAST OF THE SPIRITS", + "target": "GHOST OF CHRISTMAS YET TO COME", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "THE LAST OF THE SPIRITS", + "weight": 1.0 + }, + { + "source": "BUSINESS MEN", + "target": "MERCHANTS", + "weight": 7.0 + }, + { + "source": "BUSINESS MEN", + "target": "CITY HEART", + "weight": 7.0 + }, + { + "source": "BUSINESS MEN", + "target": "DEATH OF UNNAMED MAN", + "weight": 8.0 + }, + { + "source": "GREAT FAT MAN", + "target": "BUSINESS MEN", + "weight": 8.0 + }, + { + "source": "RED-FACED GENTLEMAN", + "target": "BUSINESS MEN", + "weight": 8.0 + }, + { + "source": "MAN WITH LARGE CHIN", + "target": "BUSINESS MEN", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "BUSINESS MEN", + "weight": 7.0 + }, + { + "source": "NIGHT", + "target": "THE LAST OF THE SPIRITS", + "weight": 7.0 + }, + { + "source": "DEATH OF UNNAMED MAN", + "target": "CITY", + "weight": 7.0 + }, + { + "source": "DEATH OF UNNAMED MAN", + "target": "SCROOGE", + "weight": 9.0 + }, + { + "source": "GHOST OF CHRISTMAS YET TO COME", + "target": "DEATH OF UNNAMED MAN", + "weight": 9.0 + }, + { + "source": "CHANGE", + "target": "MERCHANTS", + "weight": 8.0 + }, + { + "source": "CITY HEART", + "target": "CHANGE", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "JACOB", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "PHANTOM", + "weight": 18.0 + }, + { + "source": "BUSINESSMEN", + "target": "SCROOGE", + "weight": 7.0 + }, + { + "source": "RED-FACED GENTLEMAN", + "target": "MAN WITH LARGE CHIN", + "weight": 6.0 + }, + { + "source": "BUSINESS COMPANY", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "TOWN", + "target": "DEN OF INFAMOUS RESORT", + "weight": 8.0 + }, + { + "source": "DEN OF INFAMOUS RESORT", + "target": "RAG-AND-BONE SHOP", + "weight": 9.0 + }, + { + "source": "RAG-AND-BONE SHOP", + "target": "GREY-HAIRED RASCAL", + "weight": 9.0 + }, + { + "source": "FUNERAL OF THE DECEASED", + "target": "BUSINESSMEN", + "weight": 7.0 + }, + { + "source": "CONVERSATION ABOUT DEATH", + "target": "BUSINESSMEN", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "FUNERAL OF THE DECEASED", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "CONVERSATION ABOUT DEATH", + "weight": 1.0 + }, + { + "source": "PORCH", + "target": "BUSINESS DISTRICT", + "weight": 8.0 + }, + { + "source": "BUSINESS DISTRICT", + "target": "TOWN", + "weight": 9.0 + }, + { + "source": "OBSCURE PART OF TOWN", + "target": "TOWN", + "weight": 9.0 + }, + { + "source": "ALLEYS AND ARCHWAYS", + "target": "OBSCURE PART OF TOWN", + "weight": 8.0 + }, + { + "source": "CHRISTMAS-TIME", + "target": "CONVERSATION ABOUT DEATH", + "weight": 7.0 + }, + { + "source": "DEATH OF OLD SCRATCH", + "target": "FUNERAL OF THE DECEASED", + "weight": 9.0 + }, + { + "source": "CHANGE OF LIFE", + "target": "SCROOGE", + "weight": 9.0 + }, + { + "source": "GROUP OF SPEAKERS AND LISTENERS", + "target": "BUSINESSMEN", + "weight": 8.0 + }, + { + "source": "GROUP OF SPEAKERS AND LISTENERS", + "target": "RED-FACED GENTLEMAN", + "weight": 8.0 + }, + { + "source": "GROUP OF SPEAKERS AND LISTENERS", + "target": "MAN WITH LARGE CHIN", + "weight": 1.0 + }, + { + "source": "OLD JOE", + "target": "SHOP", + "weight": 10.0 + }, + { + "source": "CHARWOMAN", + "target": "OLD JOE", + "weight": 8.0 + }, + { + "source": "MRS. DILBER", + "target": "OLD JOE", + "weight": 8.0 + }, + { + "source": "UNDERTAKER'S MAN", + "target": "OLD JOE", + "weight": 8.0 + }, + { + "source": "CHARWOMAN", + "target": "MRS. DILBER", + "weight": 7.0 + }, + { + "source": "CHARWOMAN", + "target": "UNDERTAKER'S MAN", + "weight": 7.0 + }, + { + "source": "MRS. DILBER", + "target": "UNDERTAKER'S MAN", + "weight": 7.0 + }, + { + "source": "CHARWOMAN", + "target": "DEAD MAN", + "weight": 8.0 + }, + { + "source": "MRS. DILBER", + "target": "DEAD MAN", + "weight": 8.0 + }, + { + "source": "UNDERTAKER'S MAN", + "target": "DEAD MAN", + "weight": 8.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "CHARWOMAN", + "weight": 10.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "MRS. DILBER", + "weight": 10.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "UNDERTAKER'S MAN", + "weight": 10.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "OLD JOE", + "weight": 10.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "SHOP", + "weight": 10.0 + }, + { + "source": "SELLING OF THE DEAD MAN'S POSSESSIONS", + "target": "DEAD MAN", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "DEAD MAN", + "weight": 10.0 + }, + { + "source": "PHANTOM", + "target": "MEETING IN OLD JOE'S SHOP", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "MEETING IN OLD JOE'S SHOP", + "weight": 8.0 + }, + { + "source": "PARLOUR", + "target": "SHOP", + "weight": 10.0 + }, + { + "source": "MEETING IN OLD JOE'S SHOP", + "target": "PARLOUR", + "weight": 1.0 + }, + { + "source": "THE WOMAN", + "target": "THE DEAD MAN", + "weight": 7.0 + }, + { + "source": "JOE", + "target": "THE DEAD MAN", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "THE PHANTOM", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE DEAD MAN", + "target": "THE ROOM", + "weight": 9.0 + }, + { + "source": "DEATH", + "target": "THE DEAD MAN", + "weight": 9.0 + }, + { + "source": "SCROOGE", + "target": "DEATH", + "weight": 7.0 + }, + { + "source": "THE PHANTOM", + "target": "DEATH", + "weight": 1.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "JOE", + "weight": 8.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "THE BUNDLE", + "weight": 8.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "THE SHIRT", + "weight": 8.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "THE BLANKETS", + "weight": 8.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "THE BED", + "weight": 8.0 + }, + { + "source": "THE FIRST WOMAN", + "target": "THE DEAD MAN", + "weight": 7.0 + }, + { + "source": "THE OLD MAN", + "target": "THE LAMP", + "weight": 7.0 + }, + { + "source": "THE OLD MAN", + "target": "THE DEAD MAN", + "weight": 6.0 + }, + { + "source": "THE HOUSE", + "target": "THE DEAD MAN", + "weight": 9.0 + }, + { + "source": "THE BED", + "target": "THE DEAD MAN", + "weight": 9.0 + }, + { + "source": "THE SHEET", + "target": "THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE BLANKETS", + "target": "THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE SHIRT", + "target": "THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE BUNDLE", + "target": "THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE FLANNEL BAG", + "target": "JOE", + "weight": 7.0 + }, + { + "source": "THE LAMP", + "target": "THE ROOM", + "weight": 7.0 + }, + { + "source": "THE ROOM", + "target": "THE HOUSE", + "weight": 8.0 + }, + { + "source": "THE BED", + "target": "THE ROOM", + "weight": 8.0 + }, + { + "source": "THE SHEET", + "target": "THE BED", + "weight": 8.0 + }, + { + "source": "THE BLANKETS", + "target": "THE BED", + "weight": 8.0 + }, + { + "source": "THE SHIRT", + "target": "THE BED", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE GHOST", + "weight": 9.0 + }, + { + "source": "CAROLINE", + "target": "CAROLINE'S HUSBAND", + "weight": 10.0 + }, + { + "source": "CAROLINE", + "target": "THE CREDITOR", + "weight": 8.0 + }, + { + "source": "CAROLINE'S HUSBAND", + "target": "THE CREDITOR", + "weight": 8.0 + }, + { + "source": "CAROLINE", + "target": "THE EVENT OF THE CREDITOR'S DEATH", + "weight": 8.0 + }, + { + "source": "CAROLINE'S HUSBAND", + "target": "THE EVENT OF THE CREDITOR'S DEATH", + "weight": 8.0 + }, + { + "source": "BOB CRATCHIT", + "target": "CRATCHIT'S WIFE", + "weight": 10.0 + }, + { + "source": "CRATCHIT'S WIFE", + "target": "TINY TIM", + "weight": 10.0 + }, + { + "source": "PETER CRATCHIT", + "target": "TINY TIM", + "weight": 18.0 + }, + { + "source": "BOB CRATCHIT", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 10.0 + }, + { + "source": "CRATCHIT'S WIFE", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 10.0 + }, + { + "source": "PETER CRATCHIT", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 10.0 + }, + { + "source": "TINY TIM", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 10.0 + }, + { + "source": "THE EVENT OF TINY TIM'S DEATH", + "target": "BOB CRATCHIT", + "weight": 10.0 + }, + { + "source": "THE EVENT OF TINY TIM'S DEATH", + "target": "CRATCHIT'S WIFE", + "weight": 10.0 + }, + { + "source": "THE EVENT OF TINY TIM'S DEATH", + "target": "PETER CRATCHIT", + "weight": 10.0 + }, + { + "source": "THE EVENT OF TINY TIM'S DEATH", + "target": "TINY TIM", + "weight": 10.0 + }, + { + "source": "THE TOWN", + "target": "CAROLINE", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "CAROLINE'S HUSBAND", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "BOB CRATCHIT", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "CRATCHIT'S WIFE", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "PETER CRATCHIT", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "TINY TIM", + "weight": 5.0 + }, + { + "source": "THE TOWN", + "target": "THE CREDITOR", + "weight": 5.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE EVENT OF THE CREDITOR'S DEATH", + "weight": 7.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE EVENT OF TINY TIM'S DEATH", + "weight": 1.0 + }, + { + "source": "THE PHANTOM", + "target": "EBENEZER SCROOGE", + "weight": 18.0 + }, + { + "source": "THE SPIRIT", + "target": "EBENEZER SCROOGE", + "weight": 9.0 + }, + { + "source": "THE PHANTOM", + "target": "THE SPIRIT", + "weight": 10.0 + }, + { + "source": "THE MOTHER IN THE FIRST SCENE", + "target": "CAROLINE", + "weight": 10.0 + }, + { + "source": "THE CHILDREN IN THE FIRST SCENE", + "target": "CAROLINE", + "weight": 10.0 + }, + { + "source": "THE HALF-DRUNKEN WOMAN", + "target": "CAROLINE'S HUSBAND", + "weight": 7.0 + }, + { + "source": "THE DARK CHAMBER", + "target": "THE SCENE OF THE DEAD MAN", + "weight": 8.0 + }, + { + "source": "THE ROOM BY DAYLIGHT", + "target": "THE DINNER BY THE FIRE", + "weight": 7.0 + }, + { + "source": "SEVERAL STREETS", + "target": "EBENEZER SCROOGE", + "weight": 6.0 + }, + { + "source": "THE LONG-EXPECTED KNOCK", + "target": "THE DINNER BY THE FIRE", + "weight": 8.0 + }, + { + "source": "THE SCENE OF THE DEAD MAN", + "target": "EBENEZER SCROOGE", + "weight": 8.0 + }, + { + "source": "THE SCENE OF THE DEAD MAN", + "target": "THE PHANTOM", + "weight": 8.0 + }, + { + "source": "THE SCENE OF THE DEAD MAN", + "target": "THE DARK CHAMBER", + "weight": 1.0 + }, + { + "source": "CRATCHIT FAMILY", + "target": "PETER CRATCHIT", + "weight": 19.0 + }, + { + "source": "BOB CRATCHIT", + "target": "MR. SCROOGE'S NEPHEW", + "weight": 7.0 + }, + { + "source": "MR. SCROOGE'S NEPHEW", + "target": "CRATCHIT FAMILY", + "weight": 6.0 + }, + { + "source": "MR. SCROOGE'S NEPHEW", + "target": "MR. SCROOGE", + "weight": 10.0 + }, + { + "source": "CHRISTMAS", + "target": "CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "SUNDAY", + "target": "BOB CRATCHIT", + "weight": 7.0 + }, + { + "source": "SUNDAY", + "target": "CRATCHIT FAMILY", + "weight": 1.0 + }, + { + "source": "CRATCHIT GIRLS", + "target": "MRS. CRATCHIT", + "weight": 8.0 + }, + { + "source": "CRATCHIT GIRLS", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "CRATCHIT CHILDREN", + "target": "BOB CRATCHIT", + "weight": 10.0 + }, + { + "source": "CRATCHIT CHILDREN", + "target": "MRS. CRATCHIT", + "weight": 10.0 + }, + { + "source": "CRATCHIT CHILDREN", + "target": "TINY TIM", + "weight": 10.0 + }, + { + "source": "ROBERT CRATCHIT", + "target": "BOB CRATCHIT", + "weight": 10.0 + }, + { + "source": "THE STREET", + "target": "BOB CRATCHIT", + "weight": 7.0 + }, + { + "source": "THE ROOM ABOVE", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "THE ROOM ABOVE", + "target": "TINY TIM", + "weight": 9.0 + }, + { + "source": "THE FIRE", + "target": "CRATCHIT FAMILY", + "weight": 8.0 + }, + { + "source": "THE TABLE", + "target": "MRS. CRATCHIT", + "weight": 7.0 + }, + { + "source": "THE TABLE", + "target": "CRATCHIT FAMILY", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S OFFICE", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "GHOST OF CHRISTMAS YET TO COME", + "weight": 18.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CHURCHYARD", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CHRISTMAS", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CRATCHIT FAMILY", + "weight": 1.0 + }, + { + "source": "SPIRIT OF TINY TIM", + "target": "TINY TIM", + "weight": 8.0 + }, + { + "source": "THE GRAVE", + "target": "CHURCHYARD", + "weight": 9.0 + }, + { + "source": "THE HOUSE", + "target": "SCROOGE'S OFFICE", + "weight": 7.0 + }, + { + "source": "THE COURT", + "target": "SCROOGE'S OFFICE", + "weight": 7.0 + }, + { + "source": "THE BEDPOST", + "target": "THE ROOM", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE BEDPOST", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE ROOM", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE PAST", + "weight": 7.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE PRESENT", + "weight": 7.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE FUTURE", + "weight": 7.0 + }, + { + "source": "THE GHOST", + "target": "THE PHANTOM", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "GHOST OF CHRISTMAS PRESENT", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SPIRITS", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "TINY TIM", + "weight": 18.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "THE BOY", + "weight": 7.0 + }, + { + "source": "THE BOY", + "target": "POULTERER", + "weight": 7.0 + }, + { + "source": "POULTERER", + "target": "BOB CRATCHIT", + "weight": 6.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "POULTERER", + "weight": 7.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CHURCHES", + "weight": 5.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "CHRISTMAS DAY", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "NEW YEAR", + "weight": 5.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S HOUSE", + "weight": 8.0 + }, + { + "source": "SCROOGE'S HOUSE", + "target": "FIREPLACE", + "weight": 6.0 + }, + { + "source": "SCROOGE'S HOUSE", + "target": "WINDOW", + "weight": 6.0 + }, + { + "source": "SCROOGE'S HOUSE", + "target": "NEXT STREET", + "weight": 5.0 + }, + { + "source": "POULTERER", + "target": "NEXT STREET", + "weight": 6.0 + }, + { + "source": "POULTERER", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 6.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "BOB CRATCHIT'S HOUSE", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "GHOST OF CHRISTMAS PAST", + "weight": 8.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "JOE MILLER", + "weight": 4.0 + }, + { + "source": "THE BOY", + "target": "SUNDAY", + "weight": 3.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "PRIZE TURKEY", + "weight": 8.0 + }, + { + "source": "PRIZE TURKEY", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "STREET-DOOR", + "target": "SCROOGE'S HOUSE", + "weight": 6.0 + }, + { + "source": "CORNER", + "target": "NEXT STREET", + "weight": 5.0 + }, + { + "source": "POULTERER", + "target": "CORNER", + "weight": 6.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "HEAVEN", + "weight": 1.0 + }, + { + "source": "SCROOGE", + "target": "PORTLY GENTLEMAN", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "CAMDEN TOWN", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "CHURCH", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S NEPHEW (FRED)", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "FRED'S HOUSE", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S TRANSFORMATION", + "weight": 10.0 + }, + { + "source": "POULTERER'S MAN", + "target": "SCROOGE", + "weight": 5.0 + }, + { + "source": "TINY TIM", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "JOE MILLER", + "target": "SCROOGE", + "weight": 2.0 + }, + { + "source": "BOB CRATCHIT", + "target": "BOB'S FAMILY", + "weight": 9.0 + }, + { + "source": "BOB'S FAMILY", + "target": "TINY TIM", + "weight": 10.0 + }, + { + "source": "SCROOGE", + "target": "BOB'S FAMILY", + "weight": 8.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S NIECE BY MARRIAGE", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "STREET-DOOR", + "weight": 5.0 + }, + { + "source": "PORTLY GENTLEMAN", + "target": "COUNTING-HOUSE", + "weight": 8.0 + }, + { + "source": "POULTERER'S MAN", + "target": "CAB", + "weight": 7.0 + }, + { + "source": "CAB", + "target": "CAMDEN TOWN", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "DINING-ROOM", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "KITCHENS OF HOUSES", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "WINDOWS", + "weight": 4.0 + }, + { + "source": "SCROOGE", + "target": "BEGGARS", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "CHILDREN", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "HOUSEKEEPERS", + "weight": 5.0 + }, + { + "source": "SCROOGE", + "target": "MISTRESS", + "weight": 7.0 + }, + { + "source": "SCROOGE", + "target": "GIRL", + "weight": 6.0 + }, + { + "source": "SCROOGE", + "target": "SCROOGE'S HAND", + "weight": 1.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "FRED", + "weight": 9.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S NIECE", + "weight": 7.0 + }, + { + "source": "FRED", + "target": "TOPPER", + "weight": 6.0 + }, + { + "source": "FRED", + "target": "PLUMP SISTER", + "weight": 6.0 + }, + { + "source": "EBENEZER SCROOGE", + "target": "SCROOGE'S TRANSFORMATION", + "weight": 10.0 + }, + { + "source": "SCROOGE'S TRANSFORMATION", + "target": "BOB CRATCHIT", + "weight": 9.0 + }, + { + "source": "SCROOGE'S TRANSFORMATION", + "target": "TINY TIM", + "weight": 9.0 + }, + { + "source": "CHRISTMAS DINNER AT FRED'S", + "target": "FRED", + "weight": 10.0 + }, + { + "source": "CHRISTMAS DINNER AT FRED'S", + "target": "EBENEZER SCROOGE", + "weight": 9.0 + }, + { + "source": "CHRISTMAS DINNER AT FRED'S", + "target": "SCROOGE'S NIECE", + "weight": 8.0 + }, + { + "source": "CHRISTMAS DINNER AT FRED'S", + "target": "TOPPER", + "weight": 7.0 + }, + { + "source": "CHRISTMAS DINNER AT FRED'S", + "target": "PLUMP SISTER", + "weight": 7.0 + }, + { + "source": "CHRISTMAS DAY", + "target": "EBENEZER SCROOGE", + "weight": 10.0 + }, + { + "source": "CHRISTMAS DAY", + "target": "BOB CRATCHIT", + "weight": 8.0 + }, + { + "source": "SCROOGE'S OFFICE", + "target": "EBENEZER SCROOGE", + "weight": 10.0 + }, + { + "source": "SCROOGE'S OFFICE", + "target": "BOB CRATCHIT", + "weight": 10.0 + }, + { + "source": "CITY", + "target": "EBENEZER SCROOGE", + "weight": 7.0 + }, + { + "source": "SPIRITS", + "target": "EBENEZER SCROOGE", + "weight": 10.0 + }, + { + "source": "BOB CRATCHIT", + "target": "BOB CRATCHIT'S FAMILY", + "weight": 10.0 + }, + { + "source": "TINY TIM", + "target": "BOB CRATCHIT'S FAMILY", + "weight": 10.0 + }, + { + "source": "CITY", + "target": "TOWN", + "weight": 6.0 + }, + { + "source": "CITY", + "target": "BOROUGH", + "weight": 6.0 + }, + { + "source": "GOOD OLD WORLD", + "target": "CITY", + "weight": 7.0 + }, + { + "source": "GOOD OLD WORLD", + "target": "TOWN", + "weight": 7.0 + }, + { + "source": "GOOD OLD WORLD", + "target": "BOROUGH", + "weight": 7.0 + }, + { + "source": "COURT", + "target": "SCROOGE'S OFFICE", + "weight": 5.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "FOUNDATION", + "weight": 9.0 + }, + { + "source": "FOUNDATION", + "target": "UNITED STATES", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "weight": 27.0 + }, + { + "source": "UNITED STATES", + "target": "PROJECT GUTENBERG", + "weight": 7.0 + }, + { + "source": "UNITED STATES", + "target": "A CHRISTMAS CAROL", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "UNITED STATES", + "weight": 15.0 + }, + { + "source": "TRANSCRIBER", + "target": "A CHRISTMAS CAROL", + "weight": 8.0 + }, + { + "source": "WWW.GUTENBERG.ORG", + "target": "PROJECT GUTENBERG", + "weight": 9.0 + }, + { + "source": "GENERAL TERMS OF USE", + "target": "FULL PROJECT GUTENBERG LICENSE", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG TRADEMARK LICENSE", + "target": "PROJECT GUTENBERG", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG EBOOK", + "target": "PROJECT GUTENBERG", + "weight": 10.0 + }, + { + "source": "PROJECT GUTENBERG EBOOK", + "target": "A CHRISTMAS CAROL", + "weight": 10.0 + }, + { + "source": "PROJECT GUTENBERG EBOOK", + "target": "FULL PROJECT GUTENBERG LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG EBOOK", + "target": "PROJECT GUTENBERG TRADEMARK LICENSE", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "FULL PROJECT GUTENBERG LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "GENERAL TERMS OF USE", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PROJECT GUTENBERG TRADEMARK LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "TRANSCRIBER", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "WWW.GUTENBERG.ORG", + "weight": 15.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "FULL PROJECT GUTENBERG LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "PROJECT GUTENBERG TRADEMARK LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "GENERAL TERMS OF USE", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "WWW.GUTENBERG.ORG", + "weight": 24.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "COPYRIGHT HOLDER", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PROJECT GUTENBERG VOLUNTEERS AND EMPLOYEES", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "DISTRIBUTION OF PROJECT GUTENBERG ELECTRONIC WORKS", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "ROYALTY PAYMENT TO PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "PERMISSION FOR USE OF PROJECT GUTENBERG TRADEMARK", + "weight": 8.0 + }, + { + "source": "COPYRIGHT HOLDER", + "target": "PERMISSION FOR USE OF PROJECT GUTENBERG TRADEMARK", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG VOLUNTEERS AND EMPLOYEES", + "target": "DISTRIBUTION OF PROJECT GUTENBERG ELECTRONIC WORKS", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PROJECT GUTENBERG LICENSE", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PROJECT GUTENBERG\u2122 TRADEMARK", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "SECTION 4", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PARAGRAPH 1.E.1", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PARAGRAPH 1.E.7", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PARAGRAPH 1.E.8", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PARAGRAPH 1.E.9", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PARAGRAPH 1.F.3", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LICENSE", + "target": "PLAIN VANILLA ASCII", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "COUNTRY", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "MISSISSIPPI", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "INTERNAL REVENUE SERVICE", + "weight": 15.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "SALT LAKE CITY", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "VOLUNTEERS", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "TRADEMARK OWNER", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "DISTRIBUTOR", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "VOLUNTEERS", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "STATE", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "AGENT", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "EMPLOYEE", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "EIN", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "BUSINESS OFFICE", + "weight": 16.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "EMPLOYEE", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "AGENT", + "weight": 1.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "STATE OF MISSISSIPPI", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "SALT LAKE CITY, UT", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "PROJECT GUTENBERG", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PROFESSOR MICHAEL S. HART", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "EIN 64-6221541", + "weight": 9.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "CHARITIES", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "DONORS", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "VOLUNTEER SUPPORT", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "PG SEARCH FACILITY", + "weight": 8.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "EMAIL NEWSLETTER", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "WWW.GUTENBERG.ORG/CONTACT", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "WWW.GUTENBERG.ORG/DONATE", + "weight": 8.0 + }, + { + "source": "DONORS", + "target": "UNITED STATES", + "weight": 6.0 + }, + { + "source": "DONORS", + "target": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "weight": 8.0 + }, + { + "source": "DONORS", + "target": "CHARITIES", + "weight": 6.0 + }, + { + "source": "DONORS", + "target": "WWW.GUTENBERG.ORG/DONATE", + "weight": 7.0 + }, + { + "source": "DONORS", + "target": "WWW.GUTENBERG.ORG/CONTACT", + "weight": 6.0 + }, + { + "source": "PROJECT GUTENBERG LITERARY ARCHIVE FOUNDATION", + "target": "VOLUNTEER SUPPORT", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "WWW.GUTENBERG.ORG/CONTACT", + "weight": 7.0 + }, + { + "source": "PROJECT GUTENBERG", + "target": "WWW.GUTENBERG.ORG/DONATE", + "weight": 1.0 + } + ] +} \ No newline at end of file diff --git a/packages/graphrag/graphrag/index/utils/stable_lcc.py b/tests/unit/graphs/nx_stable_lcc.py similarity index 67% rename from packages/graphrag/graphrag/index/utils/stable_lcc.py rename to tests/unit/graphs/nx_stable_lcc.py index 806c0fd486..dfb16489ff 100644 --- a/packages/graphrag/graphrag/index/utils/stable_lcc.py +++ b/tests/unit/graphs/nx_stable_lcc.py @@ -1,20 +1,30 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module for producing a stable largest connected component, i.e. same input graph == same output lcc.""" +"""NetworkX-based stable LCC utility, kept for side-by-side test comparisons. + +This was originally at graphrag.index.utils.stable_lcc and has been moved here +because production code no longer uses it (superseded by the DataFrame-based +graphrag.graphs.stable_lcc). +""" import html from typing import Any, cast import networkx as nx -from graphrag.index.utils.graphs import largest_connected_component + +def _largest_connected_component(graph: nx.Graph) -> nx.Graph: + """Return the largest connected component of the graph (NX-based).""" + graph = graph.copy() + lcc_nodes = max(nx.connected_components(graph), key=len) + return graph.subgraph(lcc_nodes).copy() def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph: """Return the largest connected component of the graph, with nodes and edges sorted in a stable way.""" graph = graph.copy() - graph = cast("nx.Graph", largest_connected_component(graph)) + graph = cast("nx.Graph", _largest_connected_component(graph)) graph = normalize_node_names(graph) return _stabilize_graph(graph) @@ -29,16 +39,6 @@ def _stabilize_graph(graph: nx.Graph) -> nx.Graph: fixed_graph.add_nodes_from(sorted_nodes) edges = list(graph.edges(data=True)) - # If the graph is undirected, we create the edges in a stable way, so we get the same results - # for example: - # A -> B - # in graph theory is the same as - # B -> A - # in an undirected graph - # however, this can lead to downstream issues because sometimes - # consumers read graph.nodes() which ends up being [A, B] and sometimes it's [B, A] - # but they base some of their logic on the order of the nodes, so the order ends up being important - # so we sort the nodes in the edge in a stable way, so that we always get the same order if not graph.is_directed(): def _sort_source_target(edge): diff --git a/tests/unit/graphs/test_compute_degree.py b/tests/unit/graphs/test_compute_degree.py new file mode 100644 index 0000000000..599ec25f20 --- /dev/null +++ b/tests/unit/graphs/test_compute_degree.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Side-by-side tests comparing NetworkX compute_degree with DataFrame-based compute_degree_df.""" + +import json +from pathlib import Path + +import networkx as nx +import pandas as pd +from graphrag.graphs.compute_degree import compute_degree as compute_degree_df +from pandas.testing import assert_frame_equal + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +def _make_relationships(*edges: tuple[str, str, float]) -> pd.DataFrame: + """Build a relationships DataFrame from (source, target, weight) tuples.""" + return pd.DataFrame([{"source": s, "target": t, "weight": w} for s, t, w in edges]) + + +def _normalize(df: pd.DataFrame) -> pd.DataFrame: + """Sort by title and reset index for comparison.""" + return df.sort_values("title").reset_index(drop=True) + + +def _compute_degree_via_nx(relationships: pd.DataFrame) -> pd.DataFrame: + """Compute degree using NetworkX directly.""" + graph = nx.from_pandas_edgelist( + relationships, source="source", target="target", edge_attr=["weight"] + ) + return pd.DataFrame([ + {"title": node, "degree": int(degree)} for node, degree in graph.degree + ]) + + +def test_simple_triangle(): + """Three nodes forming a triangle — each should have degree 2.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_star_topology(): + """One hub connected to many leaves.""" + rels = _make_relationships( + ("hub", "a", 1.0), + ("hub", "b", 1.0), + ("hub", "c", 1.0), + ("hub", "d", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + # hub should have degree 4 + hub_row = df_result[df_result["title"] == "hub"] + assert hub_row["degree"].iloc[0] == 4 + + +def test_disconnected_components(): + """Two separate components.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("C", "D", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_single_edge(): + """Simplest case: one edge, two nodes, each with degree 1.""" + rels = _make_relationships(("X", "Y", 1.0)) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_self_loop(): + """A self-loop contributes degree 2 in NetworkX for undirected graphs.""" + rels = _make_relationships( + ("A", "A", 1.0), + ("A", "B", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_duplicate_edges(): + """Duplicate edges in the DataFrame — NetworkX deduplicates, so should we check behavior.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("A", "B", 2.0), + ("B", "C", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_larger_graph(): + """A larger graph to exercise multiple degree values.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("A", "C", 1.0), + ("A", "D", 1.0), + ("B", "C", 1.0), + ("D", "E", 1.0), + ("E", "F", 1.0), + ) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + + +def test_fixture_graph(): + """Degree computation on the realistic A Christmas Carol fixture should match NetworkX.""" + with open(FIXTURES_DIR / "graph.json") as f: + data = json.load(f) + rels = pd.DataFrame(data["edges"]) + nx_result = _normalize(_compute_degree_via_nx(rels)) + df_result = _normalize(compute_degree_df(rels)) + assert_frame_equal(nx_result, df_result) + assert len(df_result) > 500 # sanity: realistic graph has 500+ nodes diff --git a/tests/unit/graphs/test_connected_components.py b/tests/unit/graphs/test_connected_components.py new file mode 100644 index 0000000000..5b44a01dbe --- /dev/null +++ b/tests/unit/graphs/test_connected_components.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Side-by-side tests comparing NetworkX connected components with DataFrame-based implementation.""" + +import json +from pathlib import Path + +import networkx as nx +import pandas as pd +from graphrag.graphs.connected_components import ( + connected_components, + largest_connected_component, +) + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +def _load_fixture() -> pd.DataFrame: + """Load the realistic graph fixture as a relationships DataFrame.""" + with open(FIXTURES_DIR / "graph.json") as f: + data = json.load(f) + return pd.DataFrame(data["edges"]) + + +def _make_relationships(*edges: tuple[str, str, float]) -> pd.DataFrame: + """Build a relationships DataFrame from (source, target, weight) tuples.""" + return pd.DataFrame([{"source": s, "target": t, "weight": w} for s, t, w in edges]) + + +# --------------------------------------------------------------------------- +# NetworkX reference helpers +# --------------------------------------------------------------------------- + + +def _nx_connected_components(relationships: pd.DataFrame) -> list[set[str]]: + """Compute connected components using NetworkX.""" + graph = nx.from_pandas_edgelist(relationships, source="source", target="target") + return sorted( + [set(c) for c in nx.connected_components(graph)], + key=len, + reverse=True, + ) + + +def _nx_largest_connected_component(relationships: pd.DataFrame) -> set[str]: + """Return the LCC using NetworkX.""" + components = _nx_connected_components(relationships) + return components[0] if components else set() + + +# --------------------------------------------------------------------------- +# Simple topology tests +# --------------------------------------------------------------------------- + + +def test_single_component(): + """Fully connected graph should have one component.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ) + nx_components = _nx_connected_components(rels) + df_components = connected_components(rels) + assert len(nx_components) == len(df_components) == 1 + assert nx_components[0] == df_components[0] + + +def test_two_components(): + """Two disconnected pairs should give two components.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("C", "D", 1.0), + ) + nx_components = _nx_connected_components(rels) + df_components = connected_components(rels) + assert len(nx_components) == len(df_components) == 2 + assert {frozenset(c) for c in nx_components} == { + frozenset(c) for c in df_components + } + + +def test_three_components_lcc(): + """LCC should pick the largest of three components.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("A", "C", 1.0), + ("A", "D", 1.0), + ("X", "Y", 1.0), + ("P", "Q", 1.0), + ) + nx_lcc = _nx_largest_connected_component(rels) + df_lcc = largest_connected_component(rels) + assert nx_lcc == df_lcc + assert df_lcc == {"A", "B", "C", "D"} + + +def test_star_topology(): + """Star should be a single component.""" + rels = _make_relationships( + ("hub", "a", 1.0), + ("hub", "b", 1.0), + ("hub", "c", 1.0), + ) + df_lcc = largest_connected_component(rels) + nx_lcc = _nx_largest_connected_component(rels) + assert df_lcc == nx_lcc == {"hub", "a", "b", "c"} + + +def test_duplicate_edges(): + """Duplicate edges should not affect component membership.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("A", "B", 2.0), + ("C", "D", 1.0), + ) + nx_components = _nx_connected_components(rels) + df_components = connected_components(rels) + assert len(nx_components) == len(df_components) == 2 + assert {frozenset(c) for c in nx_components} == { + frozenset(c) for c in df_components + } + + +def test_empty_relationships(): + """Empty edge list should produce no components.""" + rels = pd.DataFrame(columns=["source", "target", "weight"]) + assert connected_components(rels) == [] + assert largest_connected_component(rels) == set() + + +# --------------------------------------------------------------------------- +# Realistic fixture tests +# --------------------------------------------------------------------------- + + +def test_fixture_component_count(): + """Component count should match NetworkX on the realistic fixture.""" + rels = _load_fixture() + nx_components = _nx_connected_components(rels) + df_components = connected_components(rels) + assert len(df_components) == len(nx_components) + + +def test_fixture_component_sizes(): + """Component sizes (sorted desc) should match NetworkX.""" + rels = _load_fixture() + nx_sizes = [len(c) for c in _nx_connected_components(rels)] + df_sizes = [len(c) for c in connected_components(rels)] + assert df_sizes == nx_sizes + + +def test_fixture_lcc_membership(): + """LCC membership should be identical to NetworkX.""" + rels = _load_fixture() + nx_lcc = _nx_largest_connected_component(rels) + df_lcc = largest_connected_component(rels) + assert df_lcc == nx_lcc + + +def test_fixture_lcc_size(): + """LCC should contain 535 nodes (known from the fixture).""" + rels = _load_fixture() + lcc = largest_connected_component(rels) + assert len(lcc) == 535 diff --git a/tests/unit/graphs/test_modularity.py b/tests/unit/graphs/test_modularity.py new file mode 100644 index 0000000000..3ea1683e7a --- /dev/null +++ b/tests/unit/graphs/test_modularity.py @@ -0,0 +1,262 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Side-by-side tests for the DataFrame-based modularity utility.""" + +import json +import math +from collections import defaultdict +from pathlib import Path +from typing import Any + +import networkx as nx +import pandas as pd +from graphrag.graphs.modularity import modularity + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +# --------------------------------------------------------------------------- +# NX reference implementation (copied from graphrag.index.utils.graphs) +# --------------------------------------------------------------------------- + + +def _nx_modularity_component( + intra_community_degree: float, + total_community_degree: float, + network_degree_sum: float, + resolution: float, +) -> float: + community_degree_ratio = math.pow(total_community_degree, 2.0) / ( + 2.0 * network_degree_sum + ) + return (intra_community_degree - resolution * community_degree_ratio) / ( + 2.0 * network_degree_sum + ) + + +def _nx_modularity_components( + graph: nx.Graph, + partitions: dict[Any, int], + weight_attribute: str = "weight", + resolution: float = 1.0, +) -> dict[int, float]: + total_edge_weight = 0.0 + communities = set(partitions.values()) + + degree_sums_within_community: dict[int, float] = defaultdict(float) + degree_sums_for_community: dict[int, float] = defaultdict(float) + for vertex, neighbor_vertex, weight in graph.edges(data=weight_attribute): + vertex_community = partitions[vertex] + neighbor_community = partitions[neighbor_vertex] + if vertex_community == neighbor_community: + if vertex == neighbor_vertex: + degree_sums_within_community[vertex_community] += weight + else: + degree_sums_within_community[vertex_community] += weight * 2.0 + degree_sums_for_community[vertex_community] += weight + degree_sums_for_community[neighbor_community] += weight + total_edge_weight += weight + + return { + comm: _nx_modularity_component( + degree_sums_within_community[comm], + degree_sums_for_community[comm], + total_edge_weight, + resolution, + ) + for comm in communities + } + + +def nx_modularity( + graph: nx.Graph, + partitions: dict[Any, int], + weight_attribute: str = "weight", + resolution: float = 1.0, +) -> float: + """NX reference: compute modularity from a networkx graph.""" + components = _nx_modularity_components( + graph, partitions, weight_attribute, resolution + ) + return sum(components.values()) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _load_fixture() -> pd.DataFrame: + """Load the realistic graph fixture as a relationships DataFrame.""" + with open(FIXTURES_DIR / "graph.json") as f: + data = json.load(f) + return pd.DataFrame(data["edges"]) + + +def _make_edges(*edges: tuple[str, str, float]) -> pd.DataFrame: + """Build a relationships DataFrame from (source, target, weight) tuples.""" + return pd.DataFrame([{"source": s, "target": t, "weight": w} for s, t, w in edges]) + + +def _edges_to_nx(edges: pd.DataFrame) -> nx.Graph: + """Build an NX graph from an edges DataFrame.""" + return nx.from_pandas_edgelist(edges, edge_attr=["weight"]) + + +# --------------------------------------------------------------------------- +# Side-by-side tests +# --------------------------------------------------------------------------- + + +def test_two_clear_communities(): + """Two densely-connected communities with a weak bridge.""" + edges = _make_edges( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ("D", "E", 1.0), + ("E", "F", 1.0), + ("D", "F", 1.0), + ("C", "D", 0.1), + ) + partitions = {"A": 0, "B": 0, "C": 0, "D": 1, "E": 1, "F": 1} + + nx_result = nx_modularity(_edges_to_nx(edges), partitions) + df_result = modularity(edges, partitions) + + assert abs(nx_result - df_result) < 1e-10 + assert df_result > 0 # good partition should be positive + + +def test_single_community(): + """All nodes in one community — modularity should be zero.""" + edges = _make_edges( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ) + partitions = {"A": 0, "B": 0, "C": 0} + + nx_result = nx_modularity(_edges_to_nx(edges), partitions) + df_result = modularity(edges, partitions) + + assert abs(nx_result - df_result) < 1e-10 + assert abs(df_result) < 1e-10 + + +def test_every_node_own_community(): + """Each node in its own community — modularity should be negative.""" + edges = _make_edges( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ) + partitions = {"A": 0, "B": 1, "C": 2} + + nx_result = nx_modularity(_edges_to_nx(edges), partitions) + df_result = modularity(edges, partitions) + + assert abs(nx_result - df_result) < 1e-10 + assert df_result < 0 + + +def test_reversed_edges(): + """Reversed edge direction should not affect modularity (undirected).""" + edges_fwd = _make_edges(("A", "B", 1.0), ("B", "C", 1.0), ("C", "D", 1.0)) + edges_rev = _make_edges(("B", "A", 1.0), ("C", "B", 1.0), ("D", "C", 1.0)) + partitions = {"A": 0, "B": 0, "C": 1, "D": 1} + + fwd = modularity(edges_fwd, partitions) + rev = modularity(edges_rev, partitions) + + assert abs(fwd - rev) < 1e-10 + + +def test_weighted_edges(): + """Different weights should affect modularity.""" + edges_uniform = _make_edges( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("C", "D", 1.0), + ) + edges_weighted = _make_edges( + ("A", "B", 5.0), + ("B", "C", 0.1), + ("C", "D", 5.0), + ) + partitions = {"A": 0, "B": 0, "C": 1, "D": 1} + + u_nx = nx_modularity(_edges_to_nx(edges_uniform), partitions) + u_df = modularity(edges_uniform, partitions) + w_nx = nx_modularity(_edges_to_nx(edges_weighted), partitions) + w_df = modularity(edges_weighted, partitions) + + assert abs(u_nx - u_df) < 1e-10 + assert abs(w_nx - w_df) < 1e-10 + # weighted version should have higher modularity (strong intra, weak inter) + assert w_df > u_df + + +def test_custom_resolution(): + """Resolution parameter should affect result and match NX.""" + edges = _make_edges( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("A", "C", 1.0), + ("D", "E", 1.0), + ("C", "D", 0.5), + ) + partitions = {"A": 0, "B": 0, "C": 0, "D": 1, "E": 1} + graph = _edges_to_nx(edges) + + for res in (0.5, 1.0, 2.0): + nx_r = nx_modularity(graph, partitions, resolution=res) + df_r = modularity(edges, partitions, resolution=res) + assert abs(nx_r - df_r) < 1e-10 + + +def test_duplicate_edges(): + """Duplicate edges (same pair, different weights) should match NX dedup.""" + edges = _make_edges( + ("A", "B", 1.0), + ("A", "B", 3.0), # duplicate — NX keeps last + ("B", "C", 2.0), + ) + partitions = {"A": 0, "B": 0, "C": 1} + + nx_result = nx_modularity(_edges_to_nx(edges), partitions) + df_result = modularity(edges, partitions) + + assert abs(nx_result - df_result) < 1e-10 + + +def test_reversed_duplicate_edges(): + """Edge (A,B) and (B,A) should be treated as the same, keeping last weight.""" + edges = _make_edges( + ("A", "B", 1.0), + ("B", "A", 5.0), # reversed duplicate — NX keeps 5.0 + ("B", "C", 2.0), + ) + partitions = {"A": 0, "B": 0, "C": 1} + + nx_result = nx_modularity(_edges_to_nx(edges), partitions) + df_result = modularity(edges, partitions) + + assert abs(nx_result - df_result) < 1e-10 + + +def test_fixture_matches_nx(): + """Modularity on the fixture graph should match NX for several partitions.""" + edges = _load_fixture() + graph = _edges_to_nx(edges) + nodes = sorted(graph.nodes()) + + # Test with a few different partition schemes + for n_communities in (2, 3, 5): + partitions = {node: i % n_communities for i, node in enumerate(nodes)} + nx_result = nx_modularity(graph, partitions) + df_result = modularity(edges, partitions) + assert abs(nx_result - df_result) < 1e-10, ( + f"Mismatch for {n_communities} communities: NX={nx_result}, DF={df_result}" + ) diff --git a/tests/unit/graphs/test_stable_lcc.py b/tests/unit/graphs/test_stable_lcc.py new file mode 100644 index 0000000000..835a402318 --- /dev/null +++ b/tests/unit/graphs/test_stable_lcc.py @@ -0,0 +1,213 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Side-by-side tests for the DataFrame-based stable LCC utility.""" + +import json +from pathlib import Path + +import networkx as nx +import pandas as pd +from graphrag.graphs.stable_lcc import stable_lcc +from pandas.testing import assert_frame_equal + +from tests.unit.graphs.nx_stable_lcc import stable_largest_connected_component + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +def _load_fixture() -> pd.DataFrame: + """Load the realistic graph fixture as a relationships DataFrame.""" + with open(FIXTURES_DIR / "graph.json") as f: + data = json.load(f) + return pd.DataFrame(data["edges"]) + + +def _make_relationships(*edges: tuple[str, str, float]) -> pd.DataFrame: + """Build a relationships DataFrame from (source, target, weight) tuples.""" + return pd.DataFrame([{"source": s, "target": t, "weight": w} for s, t, w in edges]) + + +def _nx_stable_lcc_node_set(relationships: pd.DataFrame) -> set[str]: + """Get the node set from the NX stable_largest_connected_component.""" + graph = nx.from_pandas_edgelist( + relationships, + source="source", + target="target", + edge_attr=["weight"], + ) + stable_graph = stable_largest_connected_component(graph) + return set(stable_graph.nodes()) + + +def _nx_stable_lcc_edge_set(relationships: pd.DataFrame) -> set[tuple[str, str]]: + """Get the edge set from the NX stable_largest_connected_component.""" + graph = nx.from_pandas_edgelist( + relationships, + source="source", + target="target", + edge_attr=["weight"], + ) + stable_graph = stable_largest_connected_component(graph) + return {(min(s, t), max(s, t)) for s, t in stable_graph.edges()} + + +# --------------------------------------------------------------------------- +# Stability tests +# --------------------------------------------------------------------------- + + +def test_flipped_edges_produce_same_result(): + """Same graph with edges in different direction should produce identical output.""" + rels_1 = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 2.0), + ("C", "D", 3.0), + ("D", "E", 4.0), + ) + rels_2 = _make_relationships( + ("B", "A", 1.0), + ("C", "B", 2.0), + ("D", "C", 3.0), + ("E", "D", 4.0), + ) + result_1 = stable_lcc(rels_1) + result_2 = stable_lcc(rels_2) + assert_frame_equal(result_1, result_2) + + +def test_shuffled_rows_produce_same_result(): + """Different row order should produce identical output.""" + rels_1 = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 2.0), + ("C", "D", 3.0), + ) + rels_2 = _make_relationships( + ("C", "D", 3.0), + ("A", "B", 1.0), + ("B", "C", 2.0), + ) + result_1 = stable_lcc(rels_1) + result_2 = stable_lcc(rels_2) + assert_frame_equal(result_1, result_2) + + +# --------------------------------------------------------------------------- +# Name normalization tests +# --------------------------------------------------------------------------- + + +def test_normalizes_node_names(): + """Node names should be uppercased, stripped, and HTML-unescaped.""" + rels = _make_relationships( + (" alice ", "bob", 1.0), + ("bob", "carol & dave", 1.0), + ) + result = stable_lcc(rels) + all_nodes = set(result["source"]).union(result["target"]) + assert "ALICE" in all_nodes + assert "BOB" in all_nodes + assert "CAROL & DAVE" in all_nodes + + +# --------------------------------------------------------------------------- +# LCC filtering tests +# --------------------------------------------------------------------------- + + +def test_filters_to_lcc(): + """Only the largest component should remain.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("C", "A", 1.0), + ("X", "Y", 1.0), + ) + result = stable_lcc(rels) + all_nodes = set(result["source"]).union(result["target"]) + assert all_nodes == {"A", "B", "C"} + + +def test_empty_relationships(): + """Empty input should return empty output.""" + rels = pd.DataFrame(columns=["source", "target", "weight"]) + result = stable_lcc(rels) + assert result.empty + + +# --------------------------------------------------------------------------- +# Side-by-side with NX implementation +# --------------------------------------------------------------------------- + + +def test_node_set_matches_nx(): + """LCC node set should match the NX stable_largest_connected_component.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("C", "D", 1.0), + ("D", "E", 1.0), + ("X", "Y", 1.0), + ) + nx_nodes = _nx_stable_lcc_node_set(rels) + df_result = stable_lcc(rels) + df_nodes = set(df_result["source"]).union(df_result["target"]) + assert df_nodes == nx_nodes + + +def test_edge_set_matches_nx(): + """LCC edge set should match the NX stable_largest_connected_component.""" + rels = _make_relationships( + ("A", "B", 1.0), + ("B", "C", 1.0), + ("C", "D", 1.0), + ("D", "E", 1.0), + ("X", "Y", 1.0), + ) + nx_edges = _nx_stable_lcc_edge_set(rels) + df_result = stable_lcc(rels) + df_edges = { + (min(s, t), max(s, t)) + for s, t in zip(df_result["source"], df_result["target"], strict=True) + } + assert df_edges == nx_edges + + +# --------------------------------------------------------------------------- +# Fixture tests +# --------------------------------------------------------------------------- + + +def test_fixture_node_set_matches_nx(): + """Fixture LCC nodes should match NX stable LCC.""" + rels = _load_fixture() + nx_nodes = _nx_stable_lcc_node_set(rels) + df_result = stable_lcc(rels) + df_nodes = set(df_result["source"]).union(df_result["target"]) + assert df_nodes == nx_nodes + + +def test_fixture_edge_set_matches_nx(): + """Fixture LCC edges should match NX stable LCC.""" + rels = _load_fixture() + nx_edges = _nx_stable_lcc_edge_set(rels) + df_result = stable_lcc(rels) + df_edges = { + (min(s, t), max(s, t)) + for s, t in zip(df_result["source"], df_result["target"], strict=True) + } + assert df_edges == nx_edges + + +def test_fixture_edges_are_sorted(): + """Output edges should be sorted with source <= target and rows in order.""" + rels = _load_fixture() + result = stable_lcc(rels) + # Every source should be <= target + assert (result["source"] <= result["target"]).all() + # Rows should be sorted + is_sorted = ( + result[["source", "target"]].apply(tuple, axis=1).is_monotonic_increasing + ) + assert is_sorted diff --git a/tests/unit/indexing/graph/utils/test_stable_lcc.py b/tests/unit/indexing/graph/utils/test_stable_lcc.py index c4e17e54ee..905b93e559 100644 --- a/tests/unit/indexing/graph/utils/test_stable_lcc.py +++ b/tests/unit/indexing/graph/utils/test_stable_lcc.py @@ -3,7 +3,8 @@ import unittest import networkx as nx -from graphrag.index.utils.stable_lcc import stable_largest_connected_component + +from tests.unit.graphs.nx_stable_lcc import stable_largest_connected_component class TestStableLCC(unittest.TestCase):