Skip to content

Commit ea202a8

Browse files
committed
fix: without nested set
1 parent 033ae64 commit ea202a8

5 files changed

Lines changed: 150 additions & 55 deletions

File tree

database/migrations/create_geoname_table.php

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
{
1111
public function up()
1212
{
13+
Schema::create('geonames_hierarchy', function (Blueprint $table) {
14+
$table->string('parent_id')->primary();
15+
$table->string('child_id')->primary();
16+
});
17+
1318
Schema::create('geonames', function (Blueprint $table) {
1419
$table->string('id')->primary();
1520
$table->string('parent_id')->index()->nullable();
@@ -41,8 +46,7 @@ public function up()
4146
}
4247
public function down()
4348
{
44-
Schema::table('table', function (Blueprint $table) {
45-
$table->drop();
46-
});
49+
Schema::dropIfExists('geonames_hierarchy');
50+
Schema::dropIfExists('geonames');
4751
}
4852
};

src/Actions/GetHierarchyAction.php

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Parables\Geo\Actions;
66

7+
use Illuminate\Support\Facades\DB;
78
use Illuminate\Support\LazyCollection;
89
use Parables\Geo\Actions\Concerns\HasToastable;
910
use Parables\Geo\GeoName;
@@ -14,29 +15,48 @@ class GetHierarchyAction
1415

1516
/**
1617
* @param LazyCollection<int, LazyCollection> $contentsOfGeonameFiles
17-
* @return array
18+
* @return LazyCollection<int, array>
1819
*/
19-
public function execute(LazyCollection $contentsOfGeonameFiles): array
20+
public function execute(LazyCollection $contentsOfGeonameFiles): LazyCollection
2021
{
2122
ini_set('memory_limit', -1);
2223

2324
return $this->hierarchyForCitiesTowns(contentsOfGeonameFiles: $contentsOfGeonameFiles);
2425
}
2526

27+
public function hierarchy(): LazyCollection
28+
{
29+
$fileContents = (new ReadFileAction)
30+
->toastable($this->toastable)
31+
->execute(storage_path('geo/hierarchy.txt'));
32+
33+
$collection = LazyCollection::make(function () use ($fileContents) {
34+
$fileContents->each(function (string $line) {
35+
[$parentId, $childId] = array_map('trim', explode("\t", $line));
36+
yield ['parent_id' => $parentId, 'child_id' => $childId];
37+
});
38+
});
39+
40+
return $collection;
41+
}
42+
43+
/*
2644
public function hierarchy(): array
2745
{
2846
$result = [];
2947
30-
(new ReadFileAction)
48+
$fileContents = (new ReadFileAction)
3149
->toastable($this->toastable)
32-
->execute(storage_path('geo/hierarchy.txt'))
33-
->each(function (string $line) use (&$result) {
34-
[$parentId, $childId] = array_map('trim', explode("\t", $line));
35-
$result[$parentId][] = $childId;
36-
});
50+
->execute(storage_path('geo/hierarchy.txt'));
51+
52+
$fileContents->each(function (string $line) use (&$result) {
53+
[$parentId, $childId] = array_map('trim', explode("\t", $line));
54+
$result[$parentId][] = $childId;
55+
});
3756
3857
return $result;
3958
}
59+
*/
4060

4161
public function admins2Codes(): array
4262
{
@@ -57,40 +77,41 @@ public function admins2Codes(): array
5777
* @param LazyCollection<int, LazyCollection> $contentsOfGeonameFiles
5878
* @param array<int,mixed> $admin2Codes
5979
* @param array<int,mixed> $hierarchy
60-
* @return array<int,mixed>|array
80+
* @return LazyCollection<int, array>
6181
*/
62-
public function hierarchyForCitiesTowns(LazyCollection $contentsOfGeonameFiles, array $admin2Codes = [], array $hierarchy = []): array
63-
{
82+
public function hierarchyForCitiesTowns(
83+
LazyCollection $contentsOfGeonameFiles,
84+
): LazyCollection {
6485

6586
$this->toastable->toast("\n\n");
6687
$this->toastable->toast("Mapping cities and towns to the respective ADM2 division...");
6788

68-
if (empty($admin2Codes)) {
69-
$admin2Codes = $this->admins2Codes();
70-
}
71-
72-
if (empty($hierarchy)) {
73-
$hierarchy = $this->hierarchy();
74-
}
75-
76-
77-
$chunks = $contentsOfGeonameFiles->chunk(50);
78-
$chunks->each(function (LazyCollection $contentsOfGeonameFiles, int $index) use ($chunks, $admin2Codes, &$hierarchy) {
79-
$this->toastable->toast('Processing batch: ' . ($index + 1) . '/' . $chunks->count());
80-
81-
$contentsOfGeonameFiles->each(function (LazyCollection $fileContents) use ($admin2Codes, &$hierarchy) {
82-
$fileContents->each(function (string $line) use ($admin2Codes, &$hierarchy) {
83-
$geoname = GeoName::fromString(line: $line);
84-
if ($geoname->isCityTown()) {
85-
$key = $geoname->countryCode() . '.' . $geoname->admin1Code() . '.' . $geoname->admin2Code();
86-
$key = $admin2Codes[$key] ?? null;
87-
if ($key) {
88-
$hierarchy[$key][] = $geoname->id();
89+
$admin2Codes = $this->admins2Codes();
90+
91+
$collection = LazyCollection::make(function () use (
92+
$contentsOfGeonameFiles,
93+
$admin2Codes,
94+
) {
95+
$chunks = $contentsOfGeonameFiles->chunk(50);
96+
97+
$chunks->each(function (LazyCollection $contentsOfGeonameFiles, int $index) use ($chunks, $admin2Codes) {
98+
$this->toastable->toast('Processing batch: ' . ($index + 1) . '/' . $chunks->count());
99+
100+
$contentsOfGeonameFiles->each(function (LazyCollection $fileContents) use ($admin2Codes) {
101+
$fileContents->each(function (string $line) use ($admin2Codes) {
102+
$geoname = GeoName::fromString(line: $line);
103+
if ($geoname->isCityTown()) {
104+
$key = $geoname->countryCode() . '.' . $geoname->admin1Code() . '.' . $geoname->admin2Code();
105+
$parentId = $admin2Codes[$key] ?? null;
106+
if ($parentId) {
107+
yield ['parent_id' => $parentId, 'child_id' => $geoname->id()];
108+
}
89109
}
90-
}
110+
});
91111
});
92112
});
93113
});
94-
return $hierarchy;
114+
115+
return $this->hierarchy()->concat(source: $collection);
95116
}
96117
}

src/Actions/ReadFileAction.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ public function execute(string $fileName): LazyCollection
2121
return LazyCollection::empty();
2222
}
2323

24-
// $this->toastable->toast('Reading file: ' . $fileName);
25-
2624
$collection = LazyCollection::make(function () use ($fileName) {
2725
$fileStream = fopen($fileName, 'r');
2826
try {

src/Commands/GeoCommand.php

Lines changed: 85 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
use Illuminate\Console\Command;
88
use Illuminate\Support\Arr;
9+
use Illuminate\Support\Facades\DB;
910
use Illuminate\Support\Facades\Storage;
1011
use Illuminate\Support\LazyCollection;
1112
use Parables\Geo\Actions\BuildNestedSetModelAction;
@@ -57,7 +58,9 @@ public function handle(): int
5758

5859
$contentsOfGeonameFiles = $this->readGeonameFiles(fileNames: array_slice($fileNames, 2)); // skip admin2Codes.txt and hierarchy.txt
5960

60-
$nestedSet = $this->buildNestedSetModel(contentsOfGeonameFiles: $contentsOfGeonameFiles);
61+
$hierarchy = $this->buildHierarchy(contentsOfGeonameFiles: $contentsOfGeonameFiles);
62+
63+
$nestedSet = $this->buildNestedSetModel(hierarchy: $hierarchy);
6164

6265
$this->loadGeonames(contentsOfGeonameFiles: $contentsOfGeonameFiles, nestedSet: $nestedSet);
6366

@@ -180,7 +183,7 @@ public function askToSelectCountries(array $countries): array
180183
public function appendFileExtension(array $countryCodes): array
181184
{
182185
$fileNames = ['admin2Codes.txt', 'hierarchy.zip', 'no-country.zip'];
183-
return $fileNames + array_map(fn ($code) => $code . '.zip', $countryCodes);
186+
return $fileNames + array_map(fn($code) => $code . '.zip', $countryCodes);
184187
}
185188

186189
/**
@@ -232,17 +235,65 @@ public function readGeonameFiles(array $fileNames): LazyCollection
232235
return (new ReadFilesAction)->toastable($this)->execute($fileNames);
233236
}
234237

235-
/**
236-
* @param LazyCollection<int, LazyCollection> $contentsOfGeonameFiles
237-
*/
238-
public function buildNestedSetModel(LazyCollection $contentsOfGeonameFiles): array
238+
/** @param LazyCollection<int, LazyCollection> $contentsOfGeonameFiles */
239+
public function buildHierarchy(LazyCollection $contentsOfGeonameFiles): LazyCollection
239240
{
240241
$this->info('Getting hierarchy...');
241-
$hierarchy = (new GetHierarchyAction)
242-
->toastable($this)
243-
->execute(contentsOfGeonameFiles: $contentsOfGeonameFiles);
244-
$this->writeToFile(fileName: storage_path('geo/hierarchy.json'), content: $hierarchy);
245242

243+
$hierarchyCacheFile = storage_path('/geo/hierarchy.json');
244+
245+
$buildFromScratch = function (LazyCollection $contentsOfGeonameFiles)
246+
use ($hierarchyCacheFile) {
247+
$this->info('Building hierarchy from scratch...');
248+
$collection = (new GetHierarchyAction)
249+
->toastable($this)
250+
->execute(contentsOfGeonameFiles: $contentsOfGeonameFiles);
251+
252+
// TODO: Write this to the database
253+
254+
$this->toast("Inserting hierarchy into database");
255+
DB::table('geonames_hierarchy')->insertOrIgnore($collection->all());
256+
257+
$hierarchy = $collection->mapToGroups(function (array $item, int $key) {
258+
return [$item['parent_id'] => $item['child_id']];
259+
});
260+
261+
// NOTE: write the fresh update to the cache file
262+
$this->info("Writing the hierarchy to: $hierarchyCacheFile");
263+
$this->writeToFile(
264+
fileName: storage_path('geo/hierarchy.json'),
265+
content: $hierarchy->all(),
266+
);
267+
268+
return $hierarchy;
269+
};
270+
271+
if (file_exists($hierarchyCacheFile)) {
272+
$shouldRebuild = $this->confirm(
273+
question: "The $hierarchyCacheFile file already exists. Would you like to rebuild it from scratch?",
274+
default: false
275+
);
276+
277+
if ($shouldRebuild) {
278+
$hierarchy = $buildFromScratch($contentsOfGeonameFiles);
279+
} else {
280+
$this->info("Reading the hierarchy from: $hierarchyCacheFile");
281+
// read from cache
282+
$hierarchy = Arr::wrap(json_decode(
283+
file_get_contents($hierarchyCacheFile),
284+
associative: true,
285+
));
286+
}
287+
} else {
288+
$hierarchy = $buildFromScratch($contentsOfGeonameFiles);
289+
}
290+
291+
return $hierarchy;
292+
}
293+
294+
public function buildNestedSetModel(array $hierarchy): array
295+
{
296+
return [];
246297
$this->info('Building Nested Set Model...');
247298
$nestedSet = (new BuildNestedSetModelAction)
248299
->toastable($this)
@@ -276,10 +327,25 @@ public function loadGeonames(LazyCollection $contentsOfGeonameFiles, array $nest
276327
$stream = fopen(filename: $geonameFile, mode: 'w');
277328

278329
$chunks = $contentsOfGeonameFiles->chunk(50);
279-
$chunks->each(function (LazyCollection $contentsOfGeonameFiles, int $index) use ($stream, $nestedSet, $transformGeonamesAction, $loadGeonamesAction, $chunks, $progressBar) {
330+
$chunks->each(function (LazyCollection $contentsOfGeonameFiles, int $index)
331+
use (
332+
$stream,
333+
$nestedSet,
334+
$transformGeonamesAction,
335+
$loadGeonamesAction,
336+
$chunks,
337+
$progressBar,
338+
) {
280339
$this->info('Processing batch: ' . ($index + 1) . '/' . $chunks->count());
281340

282-
$contentsOfGeonameFiles->each(function (LazyCollection $fileContents) use ($stream, $nestedSet, $transformGeonamesAction, $loadGeonamesAction, $progressBar) {
341+
$contentsOfGeonameFiles->each(function (LazyCollection $fileContents)
342+
use (
343+
$stream,
344+
$nestedSet,
345+
$transformGeonamesAction,
346+
$loadGeonamesAction,
347+
$progressBar,
348+
) {
283349
$this->newLine(2);
284350

285351
$geonamesCollection = $transformGeonamesAction->execute(
@@ -289,7 +355,13 @@ public function loadGeonames(LazyCollection $contentsOfGeonameFiles, array $nest
289355
idAsindex: true
290356
);
291357

292-
fwrite(stream: $stream, data: json_encode($geonamesCollection, JSON_PRETTY_PRINT));
358+
fwrite(
359+
stream: $stream,
360+
data: json_encode(
361+
$geonamesCollection->all(),
362+
JSON_PRETTY_PRINT,
363+
),
364+
);
293365

294366
$loadGeonamesAction->execute(
295367
geonamesCollection: $geonamesCollection,

src/Models/GeoName.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class GeoName extends Model
2727
*/
2828
protected $fillable = [
2929
'id',
30+
'parent_id',
31+
'lft',
32+
'rgt',
3033
'name',
3134
'ascii_name',
3235
'alternate_names',
@@ -47,8 +50,5 @@ class GeoName extends Model
4750
'modification_date',
4851
'created_at',
4952
'updated_at',
50-
'lft',
51-
'rgt',
52-
'parent_id',
5353
];
5454
}

0 commit comments

Comments
 (0)