Skip to content

Commit

Permalink
CLDR-16101 change to using Google Sheets
Browse files Browse the repository at this point in the history
- read from sheets instead of parsing HTML
  • Loading branch information
srl295 committed Apr 26, 2024
1 parent 06f254a commit 5d3ccd1
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 41 deletions.
17 changes: 17 additions & 0 deletions tools/cldr-apps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,23 @@
<artifactId>jjwt-gson</artifactId>
<scope>runtime</scope>
</dependency>

<!-- for sheets (subtype mapping) -->
<dependency>
<groupId>com.google.api-client</groupId>
<artifactId>google-api-client</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
<artifactId>google-oauth-client-jetty</artifactId>
<version>1.34.1</version>
</dependency>
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-sheets</artifactId>
<version>v4-rev20220927-2.0.0</version>
</dependency>
</dependencies>
<build>
<finalName>${project.artifactId}</finalName>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package org.unicode.cldr.web;

import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.SheetsScopes;
import com.google.api.services.sheets.v4.model.UpdateValuesResponse;
import com.google.api.services.sheets.v4.model.ValueRange;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.logging.Level;
import org.unicode.cldr.test.CheckCLDR;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRConfigImpl;
import org.unicode.cldr.util.CldrUtility;

/** Google Sheets client for the Error Subtype */
public class ErrorSubtypeClient {

static final java.util.logging.Logger logger =
java.util.logging.Logger.getLogger(ErrorSubtypeClient.class.getSimpleName());

/** range for the all-subtypes list. This will be written to */
static final String ALL_SUBTYPES_RANGE = "AllSubtypes!A1:A";
/** range for the subtype map. This will be read. Don't count the header. */
static final String SUBTYPE_MAP_RANGE = "SubtypeToURLMap!A2:B";

/** default name of credentials (service key) file */
private static final String SUBTYPE_CREDENTIALS_JSON = "subtype-credentials.json";
/** scopes needed */
private static final List<String> SCOPES = Collections.singletonList(SheetsScopes.SPREADSHEETS);
/** property indicating the full credential path */
private static final String CLDR_SUBTYPE_CREDENTIAL_PATH = "CLDR_SUBTYPE_CREDENTIAL_PATH";
/**
* property indicating the Google Sheet ID of the spreadsheet. Spreadsheet must be shared with
* the service credential for editing.
*/
private static final String CLDR_SUBTYPE_SHEET = "CLDR_SUBTYPE_SHEET";

/** read credentials from the file */
private static Credential getCredentials() throws IOException {
CLDRConfig config = CLDRConfig.getInstance();
final String credentialsPath =
config.getProperty(CLDR_SUBTYPE_CREDENTIAL_PATH, getDefaultCredentialPath(config));
if (!new File(credentialsPath).canRead()) {
logger.severe("Could not read credentials file " + credentialsPath);
throw new IOException("Could not read credentials file " + credentialsPath);
}
InputStream in = new FileInputStream(credentialsPath);
Credential credential = GoogleCredential.fromStream(in).createScoped(SCOPES);
credential.refreshToken();
return credential;
}

/** default path for credentials */
private static String getDefaultCredentialPath(CLDRConfig config) {
return new File(config.getProperty(CldrUtility.HOME_KEY), SUBTYPE_CREDENTIALS_JSON)
.getAbsolutePath();
}

/**
* You can run this main to exercise the client. You will need to pass it the path to your
* cldr.home which is the directory containing cldr.properties
*/
public static void main(String... args) throws IOException, GeneralSecurityException {
logger.setLevel(Level.ALL);
if (args.length != 1) {
throw new IllegalArgumentException(
"Usage: " + ErrorSubtypeClient.class.getSimpleName() + " (CLDR_HOME)");
}
final String cldrHome = args[0];
logger.info("Using cldrHome=" + cldrHome);
System.setProperty(CLDRConfigImpl.class.getName() + ".cldrHome", cldrHome);
CLDRConfigImpl.setCldrHome(cldrHome);

// OK now call the client
List<List<Object>> values = updateAndReadSubtypeMap();

if (values == null || values.isEmpty()) {
System.out.println("No data found.");
} else {
for (List<Object> row : values) {
System.out.println(row.toString());
}
}
}

/** main internal API, updates the old values and reads the new ones */
static List<List<Object>> updateAndReadSubtypeMap()
throws GeneralSecurityException, IOException {
final String spreadsheetId = getSpreadsheetId();
logger.info("Attempting to access spreadsheet " + spreadsheetId);
final JsonFactory gson = GsonFactory.getDefaultInstance();
final NetHttpTransport http = GoogleNetHttpTransport.newTrustedTransport();
Sheets service =
new Sheets.Builder(http, gson, getCredentials())
.setApplicationName("CLDR SurveyTool")
.build();

// first update
updateAllSubtypesSheet(spreadsheetId, service);

// now read
List<List<Object>> values = readSubtypeSheet(spreadsheetId, service);
return values;
}

/** read values out of the subtype map */
private static List<List<Object>> readSubtypeSheet(final String spreadsheetId, Sheets service)
throws IOException {
logger.info(
"Reading subtype map from spreadsheet "
+ spreadsheetId
+ " : "
+ SUBTYPE_MAP_RANGE);
ValueRange response =
service.spreadsheets().values().get(spreadsheetId, SUBTYPE_MAP_RANGE).execute();
List<List<Object>> values = response.getValues();
if (values == null || values.isEmpty()) {
logger.warning("Read empty subtype map");
return null;
}
logger.info("Read subtype with " + values.size() + " items");
return values;
}

/** update the sheet at ALL_SUBTYPES_RANGE with the list of all subtypes */
private static void updateAllSubtypesSheet(final String spreadsheetId, Sheets service)
throws IOException {
// update the full set of items
logger.info("Updating all subtypes list " + ALL_SUBTYPES_RANGE);
final List<List<Object>> allSubtypes = new ArrayList<List<Object>>();
allSubtypes.add(Arrays.asList("subtype")); // heading
for (final CheckCLDR.CheckStatus.Subtype s : CheckCLDR.CheckStatus.Subtype.values()) {
allSubtypes.add(Arrays.asList(s.name()));
}
ValueRange allSets = new ValueRange().setValues(allSubtypes);
UpdateValuesResponse uResponse =
service.spreadsheets()
.values()
.update(spreadsheetId, ALL_SUBTYPES_RANGE, allSets)
.setValueInputOption("RAW")
.execute();
// print out the existing values
System.out.println("Updated: " + uResponse.toPrettyString());
}

/** compute the default ID of the spreadsheet */
private static String getSpreadsheetId() {
return CLDRConfig.getInstance()
.getProperty(CLDR_SUBTYPE_SHEET, "1n7H_yt2Sxea1_AAp6Ggi5zo1HxtC5-xwieBlyM2kAcs");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.json.JSONException;
import org.json.JSONObject;
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.CLDRURLS;

public class ErrorSubtypes {

Expand All @@ -19,7 +18,6 @@ public static void getJson(SurveyJSONWrapper r, HttpServletRequest request)
getRecheck(r, recheck);
return;
}
r.put("CLDR_SUBTYPE_URL", CLDRURLS.toHTML(SubtypeToURLMap.getDefaultUrl()));
r.put("COMMENT", SubtypeToURLMap.COMMENT);
r.put("BEGIN_MARKER", SubtypeToURLMap.BEGIN_MARKER);
r.put("END_MARKER", SubtypeToURLMap.END_MARKER);
Expand Down Expand Up @@ -79,11 +77,11 @@ private static void getRecheck(SurveyJSONWrapper r, String recheck)
if (recheck.startsWith("MAP")) {
try {
// load directly to make sure there are no errors
SubtypeToURLMap map = SubtypeToURLMap.makeDefaultInstance();
SubtypeToURLMap map = SubtypeToURLMap.reload();
if (map == null) {
r.put("err", "FAILED. Check for errors.");
} else {
SubtypeToURLMap.setDefaultInstance(map);
// SubtypeToURLMap.setDefaultInstance(map);
r.put("status", "SUCCESS!");
}
} catch (Throwable t) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.GeneralSecurityException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -35,7 +36,6 @@
import org.jsoup.nodes.Document;
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.CLDRCacheDir;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRTool;

@CLDRTool(
Expand All @@ -46,29 +46,22 @@ public class SubtypeToURLMap {
/**
* Little tool for validating input data.
*
* @param args list of files to validate, if empty runs against default data.
* @throws IOException
* @throws FileNotFoundException
* @throws GeneralSecurityException
*/
public static void main(String args[]) throws FileNotFoundException, IOException {
if (args.length == 0) {
System.err.println(
"Usage: SubtypeToURLMap (url or file path). The default map is "
+ getDefaultUrl());
return;
} else {
int problems = 0;
for (final String fn : args) {
System.out.println("data: " + fn);
SubtypeToURLMap map = getInstance(new File(fn));
problems += map.dump();
}
if (problems > 0) {
throw new IllegalArgumentException(
MessageFormat.format(
"Total problem(s) found: {0} in {1} items(s)",
problems, args.length));
}
public static void main(String... args)
throws FileNotFoundException, IOException, GeneralSecurityException {
// sublaunch to pickup dynamic part
ErrorSubtypeClient.main(args);

int problems = 0;
SubtypeToURLMap map = getInstance();
problems += map.dump();
if (problems > 0) {
throw new IllegalArgumentException(
MessageFormat.format(
"Total problem(s) found: {0} in {1} items(s)", problems, 1));
}
}

Expand Down Expand Up @@ -411,8 +404,6 @@ private static SubtypeToURLMap getInstance(URL resource, Document doc) throws IO
}
}

static final String DEFAULT_URL = "https://cldr.unicode.org/development/subtypes";

private static String CACHE_SUBTYPE_FILE = "urlmap-cache.txt";

private static final class SubtypeToURLMapHelper {
Expand Down Expand Up @@ -448,16 +439,13 @@ static SubtypeToURLMap make() {
}
}
try {
map = makeDefaultInstance();
logger.info("Read new map from " + getDefaultUrl());
map = new SubtypeToURLMap();
map.readFromSheets();
logger.info("Read new map");
// now, write out the cache
writeToCache(map);
} catch (IllegalArgumentException | IOException | URISyntaxException e) {
logger.warning(
"Could not initialize SubtypeToURLMap: "
+ e
+ " for URL "
+ getDefaultUrl());
logger.warning("Could not initialize SubtypeToURLMap: ");
e.printStackTrace();
// If we loaded the cache file, we will still use it.
if (map == null) {
Expand Down Expand Up @@ -487,14 +475,20 @@ private static void writeToCache(SubtypeToURLMap map) {
}
}

/**
* Fetch the URL used for the default map
*
* @return
*/
public static String getDefaultUrl() {
return CLDRConfig.getInstance().getProperty("CLDR_SUBTYPE_URL", DEFAULT_URL);
public void readFromSheets() {
ErrorSubtypeClient.updateAndReadSubtypeMap()
.forEach(
subtypeAndUrl -> {
if (subtypeAndUrl.isEmpty() || subtypeAndUrl.size() != 2) return;
final String s = subtypeAndUrl.get(0).toString();
final String u = subtypeAndUrl.get(1).toString();
if (s == null || u == null || s.isBlank() || u.isBlank()) return;
final Subtype t = Subtype.valueOf(s);
urlList.add(u);
map.put(t, u);
});
}

/**
* Get the default instance.
*
Expand Down

0 comments on commit 5d3ccd1

Please sign in to comment.