Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cogify): migrate argo-task work to basemaps-cogify BM-1127 #3393

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/cogify/src/cogify/cli.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { subcommands } from 'cmd-ts';

import { TopoStacCreationCommand } from '../topo-raster/cli/cli.stac.js';
import { BasemapsCogifyCreateCommand } from './cli/cli.cog.js';
import { BasemapsCogifyConfigCommand } from './cli/cli.config.js';
import { BasemapsCogifyCoverCommand } from './cli/cli.cover.js';
Expand All @@ -12,5 +13,6 @@ export const CogifyCli = subcommands({
create: BasemapsCogifyCreateCommand,
config: BasemapsCogifyConfigCommand,
validate: BasemapsCogifyValidateCommand,
stac: TopoStacCreationCommand,
},
});
210 changes: 210 additions & 0 deletions packages/cogify/src/topo-raster/cli/cli.stac.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import { loadTiffsFromPaths } from '@basemaps/config-loader/build/json/tiff.config.js';
import { Bounds, Epsg, Nztm2000Tms, TileMatrixSets } from '@basemaps/geo';
import { fsa, LogType } from '@basemaps/shared';
import { CliInfo } from '@basemaps/shared/build/cli/info.js';
import { boolean, command, flag, option, string } from 'cmd-ts';
import pLimit from 'p-limit';

import { isArgo } from '../../argo.js';
import { UrlFolder } from '../../cogify/parsers.js';
import { getLogger, logArguments } from '../../log.js';
import { groupTiffsByDirectory } from '../mappers/group-tiffs-by-directory.js';
import { mapEpsgToSlug } from '../mappers/map-epsg-to-slug.js';
import { createStacCollection } from '../stac/create-stac-collection.js';
import { createStacItems } from '../stac/create-stac-item-groups.js';
import { writeStacFiles } from '../stac/write-stac-files.js';
import { MapSheetStacItem } from '../types/map-sheet-stac-item.js';

const Q = pLimit(10);
export const brokenTiffs = { noBounds: [] as string[], noEpsg: [] as string[], noSize: [] as string[] };

/**
* List all the tiffs in a directory for topographic maps and create cogs for each.
*
* @param source: Location of the source files
* @example s3://linz-topographic-upload/topographic/TopoReleaseArchive/NZTopo50_GeoTif_Gridless/
*
* @param target: Location of the target path
*/
export const TopoStacCreationCommand = command({
name: 'topo-stac-creation',
version: CliInfo.version,
description: 'List input topographic files, create StacItems, and generate tiles for grouping.',
args: {
...logArguments,
title: option({
type: string,
long: 'title',
description: 'Imported imagery title',
}),
source: option({
type: UrlFolder,
long: 'source',
description: 'Location of the source files',
}),
target: option({
type: UrlFolder,
long: 'target',
description: 'Target location for the output files',
}),
scale: option({
type: string,
long: 'scale',
description: 'topo25, topo50, or topo250',
}),
resolution: option({
type: string,
long: 'resolution',
description: 'e.g. gridless_600dpi',
}),
latestOnly: flag({
type: boolean,
defaultValue: () => false,
long: 'latest-only',
description: 'Only process the latest version of each map sheet',
defaultValueIsSerializable: true,
}),
forceOutput: flag({
type: boolean,
defaultValue: () => false,
long: 'force-output',
defaultValueIsSerializable: true,
}),
},
async handler(args) {
const logger = getLogger(this, args);
const startTime = performance.now();
logger.info('ListJobs:Start');

const { epsgDirectoryPaths, stacItemPaths } = await loadTiffsToCreateStacs(
args.latestOnly,
args.source,
args.target,
args.title,
args.scale,
args.resolution,
args.forceOutput,
logger,
);

if (epsgDirectoryPaths.length === 0 || stacItemPaths.length === 0) throw new Error('No Stac items created');

// write stac items into an JSON array
if (args.forceOutput || isArgo()) {
const targetURL = isArgo() ? new URL('/tmp/topo-stac-creation/') : args.target;

// for create-config: we need to tell create-config to create a bundled config for each epsg folder (latest only).
// workflow: will loop 'targets.json' and create a node for each path where each node's job is to create a bundled config.
await fsa.write(new URL('targets.json', targetURL), JSON.stringify(epsgDirectoryPaths, null, 2));

// tiles.json makes the tiff files
await fsa.write(new URL('tiles.json', targetURL), JSON.stringify(stacItemPaths, null, 2));
await fsa.write(new URL('brokenTiffs.json', targetURL), JSON.stringify(brokenTiffs, null, 2));
}

logger.info({ duration: performance.now() - startTime }, 'ListJobs:Done');
},
});

/**
* @param source: Source directory URL from which to load tiff files
* @example TODO
*
* @param target: Destination directory URL into which to save the STAC collection and item JSON files
* @example TODO
*
* @param title: The title of the collection
* @example "New Zealand Topo50 Map Series (Gridless)"
*
* @returns an array of StacItem objects
*/
async function loadTiffsToCreateStacs(
latestOnly: boolean,
source: URL,
target: URL,
title: string,
scale: string,
resolution: string,
forceOutput: boolean,
logger?: LogType,
): Promise<{ epsgDirectoryPaths: { epsg: string; url: URL }[]; stacItemPaths: { path: URL }[] }> {
logger?.info({ source }, 'LoadTiffs:Start');
// extract all file paths from the source directory and convert them into URL objects
const fileURLs = await fsa.toArray(fsa.list(source));
// process all of the URL objects into Tiff objects
const tiffs = await loadTiffsFromPaths(fileURLs, Q);
logger?.info({ numTiffs: tiffs.length }, 'LoadTiffs:End');

// group all of the Tiff objects by epsg and map code
logger?.info('GroupTiffs:Start');
const itemsByDir = groupTiffsByDirectory(tiffs, logger);
const itemsByDirPath = new URL('itemsByDirectory.json', target);
await fsa.write(itemsByDirPath, JSON.stringify(itemsByDir, null, 2));
logger?.info('GroupTiffs:End');

const epsgDirectoryPaths: { epsg: string; url: URL }[] = [];
const stacItemPaths = [];

// create and write stac items and collections
for (const [epsg, itemsByMapCode] of itemsByDir.all.entries()) {
const allTargetURL = new URL(`${scale}/${resolution}/${epsg}/`, target);
const latestTargetURL = new URL(`${scale}_latest/${resolution}/${epsg}/`, target);

const allBounds: Bounds[] = [];
const allStacItems: MapSheetStacItem[] = [];

const latestBounds: Bounds[] = [];
const latestStacItems: MapSheetStacItem[] = [];

// parse epsg
const epsgCode = Epsg.parse(epsg);
if (epsgCode == null) throw new Error(`Failed to parse epsg '${epsg}'`);

// convert epsg to tile matrix
const tileMatrix = TileMatrixSets.tryGet(epsgCode) ?? Nztm2000Tms; // TODO: support other tile matrices
if (tileMatrix == null) throw new Error(`Failed to convert epsg code '${epsgCode.code}' to a tile matrix`);

// create stac items
logger?.info({ epsg }, 'CreateStacItems:Start');
for (const [mapCode, items] of itemsByMapCode.entries()) {
// get latest item
const latest = itemsByDir.latest.get(epsg).get(mapCode);

// create stac items
const stacItems = createStacItems(allTargetURL, tileMatrix, items, latest, logger);

allBounds.push(...items.map((item) => item.bounds));
allStacItems.push(...stacItems.all);

latestBounds.push(latest.bounds);
latestStacItems.push(stacItems.latest);
}

// convert epsg to slug
const epsgSlug = mapEpsgToSlug(epsgCode.code);
if (epsgSlug == null) throw new Error(`Failed to map epsg code '${epsgCode.code}' to a slug`);

const linzSlug = `${scale}-${epsgSlug}`;

// create collections
const collection = createStacCollection(title, linzSlug, Bounds.union(allBounds), allStacItems, logger);
const latestCollection = createStacCollection(title, linzSlug, Bounds.union(latestBounds), latestStacItems, logger);
logger?.info({ epsg }, 'CreateStacItems:End');

if (forceOutput || isArgo()) {
epsgDirectoryPaths.push({ epsg, url: latestTargetURL });

// write stac items and collections
logger?.info({ epsg }, 'WriteStacFiles:Start');
if (!latestOnly) {
const allPaths = await writeStacFiles(allTargetURL, allStacItems, collection, logger);
stacItemPaths.push(...allPaths.itemPaths);
}
const latestPaths = await writeStacFiles(latestTargetURL, latestStacItems, latestCollection, logger);
stacItemPaths.push(...latestPaths.itemPaths);
logger?.info({ epsg }, 'WriteStacFiles:End');
}
}

return { epsgDirectoryPaths, stacItemPaths };
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { strictEqual, throws } from 'node:assert';
import { describe, it } from 'node:test';

import { extractMapCodeAndVersion } from '../extract-map-code-and-version.js';

describe('extractMapCodeAndVersion', () => {
const FakeDomain = 's3://topographic/fake-domain';
const validFiles = [
{ input: `${FakeDomain}/MB07_GeoTifv1-00.tif`, expected: { mapCode: 'MB07', version: 'v1-00' } },
{ input: `${FakeDomain}/MB07_GRIDLESS_GeoTifv1-00.tif`, expected: { mapCode: 'MB07', version: 'v1-00' } },
{ input: `${FakeDomain}/MB07_TIFFv1-00.tif`, expected: { mapCode: 'MB07', version: 'v1-00' } },
{ input: `${FakeDomain}/MB07_TIFF_600v1-00.tif`, expected: { mapCode: 'MB07', version: 'v1-00' } },
{
input: `${FakeDomain}/AX32ptsAX31AY31AY32_GeoTifv1-00.tif`,
expected: { mapCode: 'AX32ptsAX31AY31AY32', version: 'v1-00' },
},
{
input: `${FakeDomain}/AZ36ptsAZ35BA35BA36_GeoTifv1-00.tif`,
expected: { mapCode: 'AZ36ptsAZ35BA35BA36', version: 'v1-00' },
},
];
const invalidFiles = [`${FakeDomain}/MB07_GeoTif1-00.tif`, `${FakeDomain}/MB07_TIFF_600v1.tif`];

it('should parse the correct MapSheet Names', () => {
for (const file of validFiles) {
const output = extractMapCodeAndVersion(file.input);
strictEqual(output.mapCode, file.expected.mapCode, 'Map code does not match');
strictEqual(output.version, file.expected.version, 'Version does not match');
}
});

it('should not able to parse a version from file', () => {
for (const file of invalidFiles) {
throws(() => extractMapCodeAndVersion(file), new Error('Version not found in the file name'));
}
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { Bounds } from '@basemaps/geo';
import { RasterTypeKey, Tiff, TiffTagGeo } from '@cogeotiff/core';

/**
* Attempts to extract a bounds set from the given Tiff object.
*
* @param tiff: the Tiff object from which to extract a bounds set.
*
* @returns a Bounds object, on success. Otherwise, null.
*/
export function extractBoundsFromTiff(tiff: Tiff): Bounds | null {
const img = tiff.images[0];
if (img == null) {
throw new Error(`No images found in Tiff file: ${tiff.source.url.href}`);
}

if (img.valueGeo(TiffTagGeo.GTRasterTypeGeoKey) === RasterTypeKey.PixelIsPoint) {
throw new Error("'Pixel is Point' raster grid spacing is not supported");
}

return Bounds.fromBbox(img.bbox);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { Epsg } from '@basemaps/geo';
import { LogType } from '@basemaps/shared';
import { Tiff, TiffTagGeo } from '@cogeotiff/core';

const projections: Record<string, Epsg> = {
'Universal Transverse Mercator Zone': Epsg.Wgs84,
'Chatham Islands Transverse Mercator 2000': Epsg.Citm2000,
'New Zealand Transverse Mercator 2000': Epsg.Nztm2000,
};

/**
* Attempts to extract an epsg value from the given Tiff object.
*
* @param tiff: The Tiff object from which to extract an epsg value.
*
* @returns an Epsg instance, on success. Otherwise, null.
*/
export function extractEpsgFromTiff(tiff: Tiff, logger?: LogType): Epsg | null {
const img = tiff.images[0];
if (img == null) {
throw new Error(`No images found in Tiff file: ${tiff.source.url.href}`);
}

// try to extract the epsg directly from the tiff
const epsg = img.epsg;

if (epsg != null) {
const code = Epsg.tryGet(epsg);

if (code != null) {
logger?.info({ found: true, method: 'direct' }, 'extractEpsgFromTiff()');
return code;
}
}

// try to extract the epsg from the tiff's projected citation geotag
const tag = img.valueGeo(TiffTagGeo.ProjectedCitationGeoKey);

if (typeof tag === 'string') {
for (const [citation, epsg] of Object.entries(projections)) {
if (tag.startsWith(citation)) {
logger?.info({ found: true, method: 'geotag' }, 'extractEpsgFromTiff()');
return epsg;
}
}
}

logger?.info({ found: false }, 'extractEpsgFromTiff()');
return null;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { LogType } from '@basemaps/shared';
import path from 'path';

/**
* Attempts to extract a map code and version from the filename of the provided filepath.
* Throws an error if either detail cannot be parsed.
*
* @param file: the filepath from which to extract a map code and version.
*
* @example
* file: "s3://linz-topographic-upload/topographic/TopoReleaseArchive/NZTopo50_GeoTif_Gridless/CJ10_GRIDLESS_GeoTifv1-00.tif"
* returns: { mapCode: "CJ10", version: "v1-00" }
*
* @returns an object containing the map code and version.
*/
export function extractMapCodeAndVersion(file: string, logger?: LogType): { mapCode: string; version: string } {
const url = new URL(file);
const filePath = path.parse(url.href);
const fileName = filePath.name;

// extract map code from head of the file name (e.g. CJ10)
const mapCode = fileName.split('_')[0];
if (mapCode == null) throw new Error('Map sheet not found in the file name');

// extract version from tail of the file name (e.g. v1-00)
const version = fileName.match(/v(\d)-(\d\d)/)?.[0];
if (version == null) throw new Error('Version not found in the file name');

logger?.info({ mapCode, version }, 'extractMapCodeAndVersion()');
return { mapCode, version };
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { Size } from '@basemaps/geo';
import { LogType } from '@basemaps/shared';
import { Tiff } from '@cogeotiff/core';

/**
* Attempts to extract a size from the given Tiff object.
*
* @param tiff: the Tiff object from which to extract the size.
*
* @returns a Size object, on success. Otherwise, null.
*/
export function extractSizeFromTiff(tiff: Tiff, logger?: LogType): Size | null {
try {
const size = tiff.images[0]?.size ?? null;

logger?.info({ found: size }, 'extractSizeFromTiff()');
return size;
} catch (e) {
logger?.info({ found: false }, 'extractSizeFromTiff()');
return null;
}
}
Loading
Loading