Skip to content

Commit

Permalink
feat: lieux with duplicates from api are paginated (#190)
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-gavanier authored Feb 14, 2024
1 parent d4f17e6 commit ef560f1
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 94 deletions.
1 change: 1 addition & 0 deletions src/common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ export * from './data-inclusion';
export * from './http';
export * from './mediation-numerique';
export * from './output-file';
export * from './pagination/pagination';
export * from './publish-metadata';
20 changes: 20 additions & 0 deletions src/common/pagination/pagination.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import axios from 'axios';

export type Pagination<T> = {
data: T[];
links: {
self: string;
first: string;
last: string;
next?: string;
previous?: string;
};
};

export const paginate = async <T>(url: string | undefined, query: string = '', data: T[] = []): Promise<T[]> => {
if (url == null) return data;

const nextResult: Pagination<T> = (await axios.get<Pagination<T>>(query === '' ? url : `${url}&${query}`)).data;

return paginate(nextResult.links.next, query, [...data, ...nextResult.data]);
};
59 changes: 35 additions & 24 deletions src/dedupliquer/cli/action/dedupliquer.action.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import axios, { AxiosResponse } from 'axios';
import { SchemaLieuMediationNumerique } from '@gouvfr-anct/lieux-de-mediation-numerique';

import { paginate } from '../../../common';
import { DeduplicationRepository } from '../../repositories';
import {
DuplicationComparison,
Expand All @@ -21,27 +20,39 @@ const onlyMoreThanDuplicationScoreThreshold =
(duplicationComparison: DuplicationComparison): boolean =>
duplicationComparison.score > (allowInternalMerge ? INTERNAL_DUPLICATION_SCORE_THRESHOLD : DUPLICATION_SCORE_THRESHOLD);

/* eslint-disable-next-line max-statements */
/* eslint-disable-next-line max-statements, max-lines-per-function */
export const dedupliquerAction = async (dedupliquerOptions: DedupliquerOptions): Promise<void> => {
const repository: DeduplicationRepository = deduplicationRepository(dedupliquerOptions);

const allLieuxWithDuplicates: AxiosResponse<SchemaLieuMediationNumerique[]> = await axios.get(dedupliquerOptions.baseSource);

const lieuxToDeduplicate: AxiosResponse<SchemaLieuMediationNumerique[]> = await axios.get(dedupliquerOptions.source);

const duplicationComparisonsToGroup: DuplicationComparison[] = duplicationComparisons(
allLieuxWithDuplicates.data,
dedupliquerOptions.allowInternal,
lieuxToDeduplicate.data
).filter(onlyMoreThanDuplicationScoreThreshold(dedupliquerOptions.allowInternal));

const groups: Groups = groupDuplicates(duplicationComparisonsToGroup);
const merged: MergedLieuxByGroupMap = mergeDuplicates(new Date())(allLieuxWithDuplicates.data, groups);

/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux concernés par une fusion :', groups.itemGroupMap.size);
/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux fusionnés à enregistrer :', merged.size);

await repository.save(groups, merged, allLieuxWithDuplicates.data);
try {
const repository: DeduplicationRepository = deduplicationRepository(dedupliquerOptions);

const allLieuxWithDuplicates: SchemaLieuMediationNumerique[] = await paginate<SchemaLieuMediationNumerique>(
`${dedupliquerOptions.baseSource.split('?')[0]}?page[number]=0&page[size]=10000`,
dedupliquerOptions.baseSource.split('?')[1]
);

const lieuxToDeduplicate: SchemaLieuMediationNumerique[] = await paginate<SchemaLieuMediationNumerique>(
`${dedupliquerOptions.source.split('?')[0]}?page[number]=0&page[size]=10000`,
dedupliquerOptions.source.split('?')[1]
);

const duplicationComparisonsToGroup: DuplicationComparison[] = duplicationComparisons(
allLieuxWithDuplicates,
dedupliquerOptions.allowInternal,
lieuxToDeduplicate
).filter(onlyMoreThanDuplicationScoreThreshold(dedupliquerOptions.allowInternal));

const groups: Groups = groupDuplicates(duplicationComparisonsToGroup);

const merged: MergedLieuxByGroupMap = mergeDuplicates(new Date())(allLieuxWithDuplicates, groups);

/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux concernés par une fusion :', groups.itemGroupMap.size);
/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux fusionnés à enregistrer :', merged.size);

await repository.save(groups, merged, allLieuxWithDuplicates);
} catch (error) {
/* eslint-disable-next-line no-console */
console.log(error);
}
};
63 changes: 48 additions & 15 deletions src/dedupliquer/cli/data/save/save-with-api.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/* eslint-disable max-lines-per-function, max-statements, no-await-in-loop */

import axios, { AxiosResponse } from 'axios';
import { authHeader, headers } from '../../../../common';
import { authHeader, headers, paginate } from '../../../../common';
import { MergeGroupTransfer } from '../../../data';
import { findGroupIdsToDelete, Groups, MergedLieuxByGroupMap, MergeGroup, mergeGroups } from '../../../steps';
import { DedupliquerOptions } from '../../dedupliquer-options';
Expand All @@ -17,18 +19,49 @@ export const saveWithApi =
async (groups: Groups, merged: MergedLieuxByGroupMap): Promise<void> => {
if (nothingToUpdate(groups, merged)) return;

const previousMergeGroup: MergeGroup[] = (
await axios.get<MergeGroup[]>(`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`)
).data;

await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${
dedupliquerOptions.cartographieNationaleApiUrl
}/lieux-inclusion-numerique/merge-groups?markAsDeduplicated=${shouldMarkAsDeduplicated(groups.mergeGroupsMap)}`,
{
mergeGroups: mergeGroups(groups, merged),
groupIdsToDelete: findGroupIdsToDelete(previousMergeGroup)(groups)
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
try {
const mergeGroupsToSave: MergeGroup[] = mergeGroups(groups, merged);
const mergeGroupsBatchSize: number = 1000;
const numberOfMergeGroupsToSaveBatches: number = Math.ceil(mergeGroupsToSave.length / mergeGroupsBatchSize);

for (let i: number = 0; i < numberOfMergeGroupsToSaveBatches; i++) {
await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`,
{
mergeGroups: mergeGroupsToSave.slice(i * mergeGroupsBatchSize, (i + 1) * mergeGroupsBatchSize),
groupIdsToDelete: []
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
}

const groupsToDelete: string[] = findGroupIdsToDelete(
await paginate<MergeGroup>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups?page[number]=0&page[size]=2000`
)
)(groups);

const groupsToDeleteBatchSize: number = 200;
const numberOfGroupsToDeleteBatches: number = Math.ceil(groupsToDelete.length / groupsToDeleteBatchSize);

for (let i: number = 0; i < numberOfGroupsToDeleteBatches; i++) {
await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`,
{
mergeGroups: [],
groupIdsToDelete: groupsToDelete.slice(i * groupsToDeleteBatchSize, (i + 1) * groupsToDeleteBatchSize)
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
}

await axios.patch<unknown, AxiosResponse>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/mark-all-as-deduplicated`,
null,
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
} catch (error) {
/* eslint-disable-next-line no-console */
console.log(error);
}
};
33 changes: 0 additions & 33 deletions src/extract/cli/action/build-api-url/build-api-url.spec.ts

This file was deleted.

13 changes: 0 additions & 13 deletions src/extract/cli/action/build-api-url/build-api-url.ts

This file was deleted.

28 changes: 28 additions & 0 deletions src/extract/cli/action/build-api-url/extract-query-string.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { extractQueryString } from './extract-query-string';

describe('build api url', (): void => {
it('should build url with filter on code insee', (): void => {
const url: string = extractQueryString({
departements: '01,03,07,15,26,38,42,43,63,69,73,74',
duplicates: true
});

expect(url).toBe('and[mergedIds][exists]=false&adresse[beginsWith][code_insee]=01,03,07,15,26,38,42,43,63,69,73,74');
});

it('should build url without departements', (): void => {
const url: string = extractQueryString({
duplicates: true
});

expect(url).toBe('and[mergedIds][exists]=false');
});

it('should build url without duplicates', (): void => {
const url: string = extractQueryString({
duplicates: false
});

expect(url).toBe('');
});
});
9 changes: 9 additions & 0 deletions src/extract/cli/action/build-api-url/extract-query-string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { ExtractOptions } from '../../extract-options';

export const extractQueryString = ({
departements,
duplicates
}: Pick<ExtractOptions, 'departements' | 'duplicates'>): string => {
const mergedIds: string = duplicates ? 'and[mergedIds][exists]=false' : '';
return departements == null ? mergedIds : `${mergedIds}&adresse[beginsWith][code_insee]=${departements}`;
};
2 changes: 1 addition & 1 deletion src/extract/cli/action/build-api-url/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export * from './build-api-url';
export * from './extract-query-string';
11 changes: 7 additions & 4 deletions src/extract/cli/action/extract.action.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import axios from 'axios';
import {
fromSchemaLieuDeMediationNumerique,
LieuMediationNumerique,
SchemaLieuMediationNumerique
} from '@gouvfr-anct/lieux-de-mediation-numerique';
import { paginate } from '../../../common';
import { saveOutputsInFiles } from '../../../transformer/data';
import { ExtractOptions } from '../extract-options';
import { buildApiUrl } from './build-api-url';
import { extractQueryString } from './build-api-url';

export const extractAction = async (extractOptions: ExtractOptions): Promise<void> => {
const lieuxToPublish: LieuMediationNumerique[] = (
await axios.get<SchemaLieuMediationNumerique[]>(buildApiUrl(extractOptions))
).data.map(fromSchemaLieuDeMediationNumerique);
await paginate<SchemaLieuMediationNumerique>(
`${extractOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?page[number]=0&page[size]=10000`,
extractQueryString(extractOptions)
)
).map(fromSchemaLieuDeMediationNumerique);

await saveOutputsInFiles(extractOptions)(lieuxToPublish, extractOptions.duplicates ? undefined : 'sans-doublons');
};
9 changes: 5 additions & 4 deletions src/transformer/cli/action/transformer.action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import {
LieuMediationNumerique,
SchemaLieuMediationNumerique
} from '@gouvfr-anct/lieux-de-mediation-numerique';
import axios from 'axios';
import { createHash } from 'crypto';
import { paginate } from '../../../common';
import {
saveOutputsInFiles,
sourceATransformer,
Expand Down Expand Up @@ -77,10 +77,11 @@ export const transformerAction = async (transformerOptions: TransformerOptions):
await updateSourceWithCartographieNationaleApi(transformerOptions)(sourceHash);

const lieuxToPublish: LieuMediationNumerique[] = (
await axios.get<SchemaLieuMediationNumerique[]>(
`${transformerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?source[eq]=${transformerOptions.sourceName}&mergedIds[exists]=false`
await paginate<SchemaLieuMediationNumerique>(
`${transformerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?page[number]=0&page[size]=10000`,
`source[eq]=${transformerOptions.sourceName}&mergedIds[exists]=false`
)
).data.map(fromSchemaLieuDeMediationNumerique);
).map(fromSchemaLieuDeMediationNumerique);

await saveOutputsInFiles(transformerOptions)(lieuxToPublish);
};

0 comments on commit ef560f1

Please sign in to comment.