Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: lieux with duplicates from api are paginated #190

Merged
merged 1 commit into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ export * from './data-inclusion';
export * from './http';
export * from './mediation-numerique';
export * from './output-file';
export * from './pagination/pagination';
export * from './publish-metadata';
20 changes: 20 additions & 0 deletions src/common/pagination/pagination.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import axios from 'axios';

export type Pagination<T> = {
data: T[];
links: {
self: string;
first: string;
last: string;
next?: string;
previous?: string;
};
};

export const paginate = async <T>(url: string | undefined, query: string = '', data: T[] = []): Promise<T[]> => {
if (url == null) return data;

const nextResult: Pagination<T> = (await axios.get<Pagination<T>>(query === '' ? url : `${url}&${query}`)).data;

return paginate(nextResult.links.next, query, [...data, ...nextResult.data]);
};
59 changes: 35 additions & 24 deletions src/dedupliquer/cli/action/dedupliquer.action.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import axios, { AxiosResponse } from 'axios';
import { SchemaLieuMediationNumerique } from '@gouvfr-anct/lieux-de-mediation-numerique';

import { paginate } from '../../../common';
import { DeduplicationRepository } from '../../repositories';
import {
DuplicationComparison,
Expand All @@ -21,27 +20,39 @@ const onlyMoreThanDuplicationScoreThreshold =
(duplicationComparison: DuplicationComparison): boolean =>
duplicationComparison.score > (allowInternalMerge ? INTERNAL_DUPLICATION_SCORE_THRESHOLD : DUPLICATION_SCORE_THRESHOLD);

/* eslint-disable-next-line max-statements */
/* eslint-disable-next-line max-statements, max-lines-per-function */
export const dedupliquerAction = async (dedupliquerOptions: DedupliquerOptions): Promise<void> => {
const repository: DeduplicationRepository = deduplicationRepository(dedupliquerOptions);

const allLieuxWithDuplicates: AxiosResponse<SchemaLieuMediationNumerique[]> = await axios.get(dedupliquerOptions.baseSource);

const lieuxToDeduplicate: AxiosResponse<SchemaLieuMediationNumerique[]> = await axios.get(dedupliquerOptions.source);

const duplicationComparisonsToGroup: DuplicationComparison[] = duplicationComparisons(
allLieuxWithDuplicates.data,
dedupliquerOptions.allowInternal,
lieuxToDeduplicate.data
).filter(onlyMoreThanDuplicationScoreThreshold(dedupliquerOptions.allowInternal));

const groups: Groups = groupDuplicates(duplicationComparisonsToGroup);
const merged: MergedLieuxByGroupMap = mergeDuplicates(new Date())(allLieuxWithDuplicates.data, groups);

/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux concernés par une fusion :', groups.itemGroupMap.size);
/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux fusionnés à enregistrer :', merged.size);

await repository.save(groups, merged, allLieuxWithDuplicates.data);
try {
const repository: DeduplicationRepository = deduplicationRepository(dedupliquerOptions);

const allLieuxWithDuplicates: SchemaLieuMediationNumerique[] = await paginate<SchemaLieuMediationNumerique>(
`${dedupliquerOptions.baseSource.split('?')[0]}?page[number]=0&page[size]=10000`,
dedupliquerOptions.baseSource.split('?')[1]
);

const lieuxToDeduplicate: SchemaLieuMediationNumerique[] = await paginate<SchemaLieuMediationNumerique>(
`${dedupliquerOptions.source.split('?')[0]}?page[number]=0&page[size]=10000`,
dedupliquerOptions.source.split('?')[1]
);

const duplicationComparisonsToGroup: DuplicationComparison[] = duplicationComparisons(
allLieuxWithDuplicates,
dedupliquerOptions.allowInternal,
lieuxToDeduplicate
).filter(onlyMoreThanDuplicationScoreThreshold(dedupliquerOptions.allowInternal));

const groups: Groups = groupDuplicates(duplicationComparisonsToGroup);

const merged: MergedLieuxByGroupMap = mergeDuplicates(new Date())(allLieuxWithDuplicates, groups);

/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux concernés par une fusion :', groups.itemGroupMap.size);
/* eslint-disable-next-line no-console */
console.log('Nouveaux lieux fusionnés à enregistrer :', merged.size);

await repository.save(groups, merged, allLieuxWithDuplicates);
} catch (error) {
/* eslint-disable-next-line no-console */
console.log(error);
}
};
63 changes: 48 additions & 15 deletions src/dedupliquer/cli/data/save/save-with-api.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/* eslint-disable max-lines-per-function, max-statements, no-await-in-loop */

import axios, { AxiosResponse } from 'axios';
import { authHeader, headers } from '../../../../common';
import { authHeader, headers, paginate } from '../../../../common';
import { MergeGroupTransfer } from '../../../data';
import { findGroupIdsToDelete, Groups, MergedLieuxByGroupMap, MergeGroup, mergeGroups } from '../../../steps';
import { DedupliquerOptions } from '../../dedupliquer-options';
Expand All @@ -17,18 +19,49 @@ export const saveWithApi =
async (groups: Groups, merged: MergedLieuxByGroupMap): Promise<void> => {
if (nothingToUpdate(groups, merged)) return;

const previousMergeGroup: MergeGroup[] = (
await axios.get<MergeGroup[]>(`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`)
).data;

await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${
dedupliquerOptions.cartographieNationaleApiUrl
}/lieux-inclusion-numerique/merge-groups?markAsDeduplicated=${shouldMarkAsDeduplicated(groups.mergeGroupsMap)}`,
{
mergeGroups: mergeGroups(groups, merged),
groupIdsToDelete: findGroupIdsToDelete(previousMergeGroup)(groups)
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
try {
const mergeGroupsToSave: MergeGroup[] = mergeGroups(groups, merged);
const mergeGroupsBatchSize: number = 1000;
const numberOfMergeGroupsToSaveBatches: number = Math.ceil(mergeGroupsToSave.length / mergeGroupsBatchSize);

for (let i: number = 0; i < numberOfMergeGroupsToSaveBatches; i++) {
await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`,
{
mergeGroups: mergeGroupsToSave.slice(i * mergeGroupsBatchSize, (i + 1) * mergeGroupsBatchSize),
groupIdsToDelete: []
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
}

const groupsToDelete: string[] = findGroupIdsToDelete(
await paginate<MergeGroup>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups?page[number]=0&page[size]=2000`
)
)(groups);

const groupsToDeleteBatchSize: number = 200;
const numberOfGroupsToDeleteBatches: number = Math.ceil(groupsToDelete.length / groupsToDeleteBatchSize);

for (let i: number = 0; i < numberOfGroupsToDeleteBatches; i++) {
await axios.patch<unknown, AxiosResponse, MergeGroupTransfer>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/merge-groups`,
{
mergeGroups: [],
groupIdsToDelete: groupsToDelete.slice(i * groupsToDeleteBatchSize, (i + 1) * groupsToDeleteBatchSize)
},
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
}

await axios.patch<unknown, AxiosResponse>(
`${dedupliquerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/mark-all-as-deduplicated`,
null,
headers(authHeader(dedupliquerOptions.cartographieNationaleApiKey))
);
} catch (error) {
/* eslint-disable-next-line no-console */
console.log(error);
}
};
33 changes: 0 additions & 33 deletions src/extract/cli/action/build-api-url/build-api-url.spec.ts

This file was deleted.

13 changes: 0 additions & 13 deletions src/extract/cli/action/build-api-url/build-api-url.ts

This file was deleted.

28 changes: 28 additions & 0 deletions src/extract/cli/action/build-api-url/extract-query-string.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { extractQueryString } from './extract-query-string';

describe('build api url', (): void => {
it('should build url with filter on code insee', (): void => {
const url: string = extractQueryString({
departements: '01,03,07,15,26,38,42,43,63,69,73,74',
duplicates: true
});

expect(url).toBe('and[mergedIds][exists]=false&adresse[beginsWith][code_insee]=01,03,07,15,26,38,42,43,63,69,73,74');
});

it('should build url without departements', (): void => {
const url: string = extractQueryString({
duplicates: true
});

expect(url).toBe('and[mergedIds][exists]=false');
});

it('should build url without duplicates', (): void => {
const url: string = extractQueryString({
duplicates: false
});

expect(url).toBe('');
});
});
9 changes: 9 additions & 0 deletions src/extract/cli/action/build-api-url/extract-query-string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { ExtractOptions } from '../../extract-options';

export const extractQueryString = ({
departements,
duplicates
}: Pick<ExtractOptions, 'departements' | 'duplicates'>): string => {
const mergedIds: string = duplicates ? 'and[mergedIds][exists]=false' : '';
return departements == null ? mergedIds : `${mergedIds}&adresse[beginsWith][code_insee]=${departements}`;
};
2 changes: 1 addition & 1 deletion src/extract/cli/action/build-api-url/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export * from './build-api-url';
export * from './extract-query-string';
11 changes: 7 additions & 4 deletions src/extract/cli/action/extract.action.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import axios from 'axios';
import {
fromSchemaLieuDeMediationNumerique,
LieuMediationNumerique,
SchemaLieuMediationNumerique
} from '@gouvfr-anct/lieux-de-mediation-numerique';
import { paginate } from '../../../common';
import { saveOutputsInFiles } from '../../../transformer/data';
import { ExtractOptions } from '../extract-options';
import { buildApiUrl } from './build-api-url';
import { extractQueryString } from './build-api-url';

export const extractAction = async (extractOptions: ExtractOptions): Promise<void> => {
const lieuxToPublish: LieuMediationNumerique[] = (
await axios.get<SchemaLieuMediationNumerique[]>(buildApiUrl(extractOptions))
).data.map(fromSchemaLieuDeMediationNumerique);
await paginate<SchemaLieuMediationNumerique>(
`${extractOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?page[number]=0&page[size]=10000`,
extractQueryString(extractOptions)
)
).map(fromSchemaLieuDeMediationNumerique);

await saveOutputsInFiles(extractOptions)(lieuxToPublish, extractOptions.duplicates ? undefined : 'sans-doublons');
};
9 changes: 5 additions & 4 deletions src/transformer/cli/action/transformer.action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import {
LieuMediationNumerique,
SchemaLieuMediationNumerique
} from '@gouvfr-anct/lieux-de-mediation-numerique';
import axios from 'axios';
import { createHash } from 'crypto';
import { paginate } from '../../../common';
import {
saveOutputsInFiles,
sourceATransformer,
Expand Down Expand Up @@ -77,10 +77,11 @@ export const transformerAction = async (transformerOptions: TransformerOptions):
await updateSourceWithCartographieNationaleApi(transformerOptions)(sourceHash);

const lieuxToPublish: LieuMediationNumerique[] = (
await axios.get<SchemaLieuMediationNumerique[]>(
`${transformerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?source[eq]=${transformerOptions.sourceName}&mergedIds[exists]=false`
await paginate<SchemaLieuMediationNumerique>(
`${transformerOptions.cartographieNationaleApiUrl}/lieux-inclusion-numerique/with-duplicates?page[number]=0&page[size]=10000`,
`source[eq]=${transformerOptions.sourceName}&mergedIds[exists]=false`
)
).data.map(fromSchemaLieuDeMediationNumerique);
).map(fromSchemaLieuDeMediationNumerique);

await saveOutputsInFiles(transformerOptions)(lieuxToPublish);
};
Loading