Skip to content

Commit

Permalink
Allow comparison between detectors
Browse files Browse the repository at this point in the history
Allow --compare flag to be supported for ./build report command
to seed detectors with the same subset of languages supported by all
  • Loading branch information
russcam committed May 22, 2024
1 parent 5f6db10 commit 2581492
Show file tree
Hide file tree
Showing 187 changed files with 691 additions and 1,585 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ riderModule.iml
/_ReSharper.Caches/

.idea
.vs
*.sln.DotSettings.User

nuget
Expand Down
36 changes: 26 additions & 10 deletions build/Build/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,23 @@

var language = new Option<string[]>(["--language"], "languages to generate an accuracy report for")
{
Arity = ArgumentArity.ZeroOrMore
Arity = ArgumentArity.ZeroOrMore,
};

var detector = new Option<string[]>(["--implementation"], "implementations to generate an accuracy report for")
{
Arity = ArgumentArity.ZeroOrMore,
};
detector.FromAmong("Lingua", "NTextCat", "LanguageDetection");

var compare = new Option<bool>("--compare",
"whether implementations should use the same subset of supported languages when generating an accuracy report");

var cmd = new RootCommand
{
language,
detector,
compare,
new Argument<string[]>("targets")
{
Description =
Expand Down Expand Up @@ -90,7 +95,7 @@
{
var filter = new StringBuilder();
var languages = cmdLine.GetValueForOption(language);
if (languages is not null)
if (languages?.Length > 0)
{
foreach (var l in languages)
{
Expand All @@ -104,23 +109,34 @@
}

var detectors = cmdLine.GetValueForOption(detector);
if (detectors is not null)
if (detectors?.Length > 0)
{
foreach (var d in detectors)
if (filter.Length > 0)
filter.Append(" & ");

filter.Append('(');

for (var index = 0; index < detectors.Length; index++)
{
if (filter.Length > 0)
filter.Append(" & ");
if (index > 0)
filter.Append(" | ");

var d = detectors[index];
filter.Append("(FullyQualifiedName~.");
filter.Append(d);
filter.Append(')');
}

filter.Append(')');
}

Run("dotnet",
filter.Length > 0
? $"test tests/Lingua.AccuracyReport.Tests -c Release --no-build --filter \"{filter}\""
: "test tests/Lingua.AccuracyReport.Tests -c Release --no-build");
var additionalArgs = new StringBuilder();
if (cmdLine.GetValueForOption(compare))
additionalArgs.Append(" --environment TEST_COMPARE=\"true\"");
if (filter.Length > 0)
additionalArgs.Append($" --filter \"{filter}\" --environment TEST_FILTER=\"{filter}\"");

Run("dotnet", $"test tests/Lingua.AccuracyReport.Tests -c Release{additionalArgs}");

CombinedAccuracyReport.Create();
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System.Runtime.InteropServices;
using FluentAssertions;
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.Comparison;

public class ComparisonTests
Expand Down
2 changes: 2 additions & 0 deletions tests/Lingua.AccuracyReport.Tests/GlobalUsings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
global using Xunit;
global using static Lingua.Language;
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.LanguageDetection;

public class AfrikaansDetectionAccuracyReport(LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory> statistics)
: LanguageDetectionDetectionAccuracyReport(Afrikaans, statistics),
IClassFixture<LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory>>
{
[Theory(DisplayName = "single word detection")]
[SingleWordReportTheory(Implementation.LanguageDetection, Afrikaans)]
[SingleWordData(Afrikaans)]
public override void SingleWordsAreIdentifiedCorrectly(string singleWord) =>
ComputeSingleWordStatistics(singleWord);

[Theory(DisplayName = "word pair detection")]
[WordPairsReportTheory(Implementation.LanguageDetection, Afrikaans)]
[WordPairsData(Afrikaans)]
public override void WordPairsAreIdentifiedCorrectly(string wordPair) =>
ComputeWordPairStatistics(wordPair);

[Theory(DisplayName = "sentence detection")]
[SentenceReportTheory(Implementation.LanguageDetection, Afrikaans)]
[SentenceData(Afrikaans)]
public override void EntireSentencesAreIdentifiedCorrectly(string sentence) =>
ComputeSentenceStatistics(sentence);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.LanguageDetection;

public class AlbanianDetectionAccuracyReport(LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory> statistics)
: LanguageDetectionDetectionAccuracyReport(Albanian, statistics),
IClassFixture<LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory>>
{
[Theory(DisplayName = "single word detection")]
[SingleWordReportTheory(Implementation.LanguageDetection, Albanian)]
[SingleWordData(Albanian)]
public override void SingleWordsAreIdentifiedCorrectly(string singleWord) =>
ComputeSingleWordStatistics(singleWord);

[Theory(DisplayName = "word pair detection")]
[WordPairsReportTheory(Implementation.LanguageDetection, Albanian)]
[WordPairsData(Albanian)]
public override void WordPairsAreIdentifiedCorrectly(string wordPair) =>
ComputeWordPairStatistics(wordPair);

[Theory(DisplayName = "sentence detection")]
[SentenceReportTheory(Implementation.LanguageDetection, Albanian)]
[SentenceData(Albanian)]
public override void EntireSentencesAreIdentifiedCorrectly(string sentence) =>
ComputeSentenceStatistics(sentence);
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.LanguageDetection;

public class ArabicDetectionAccuracyReport(LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory> statistics)
: LanguageDetectionDetectionAccuracyReport(Arabic, statistics),
IClassFixture<LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory>>
{
[Theory(DisplayName = "single word detection")]
[SingleWordReportTheory(Implementation.LanguageDetection, Arabic)]
[SingleWordData(Arabic)]
public override void SingleWordsAreIdentifiedCorrectly(string singleWord) =>
ComputeSingleWordStatistics(singleWord);

[Theory(DisplayName = "word pair detection")]
[WordPairsReportTheory(Implementation.LanguageDetection, Arabic)]
[WordPairsData(Arabic)]
public override void WordPairsAreIdentifiedCorrectly(string wordPair) =>
ComputeWordPairStatistics(wordPair);

[Theory(DisplayName = "sentence detection")]
[SentenceReportTheory(Implementation.LanguageDetection, Arabic)]
[SentenceData(Arabic)]
public override void EntireSentencesAreIdentifiedCorrectly(string sentence) =>
ComputeSentenceStatistics(sentence);
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.LanguageDetection;

public class BengaliDetectionAccuracyReport(LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory> statistics)
: LanguageDetectionDetectionAccuracyReport(Bengali, statistics),
IClassFixture<LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory>>
{
[Theory(DisplayName = "single word detection")]
[SingleWordReportTheory(Implementation.LanguageDetection, Bengali)]
[SingleWordData(Bengali)]
public override void SingleWordsAreIdentifiedCorrectly(string singleWord) =>
ComputeSingleWordStatistics(singleWord);

[Theory(DisplayName = "word pair detection")]
[WordPairsReportTheory(Implementation.LanguageDetection, Bengali)]
[WordPairsData(Bengali)]
public override void WordPairsAreIdentifiedCorrectly(string wordPair) =>
ComputeWordPairStatistics(wordPair);

[Theory(DisplayName = "sentence detection")]
[SentenceReportTheory(Implementation.LanguageDetection, Bengali)]
[SentenceData(Bengali)]
public override void EntireSentencesAreIdentifiedCorrectly(string sentence) =>
ComputeSentenceStatistics(sentence);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
using Xunit;
using static Lingua.Language;

namespace Lingua.AccuracyReport.Tests.LanguageDetection;

public class BokmalDetectionAccuracyReport(LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory> statistics)
: LanguageDetectionDetectionAccuracyReport(Bokmal, statistics),
IClassFixture<LanguageDetectionStatistics<LanguageDetectionLanguageDetectorFactory>>
{
[Theory(DisplayName = "single word detection")]
[SingleWordReportTheory(Implementation.LanguageDetection, Bokmal)]
[SingleWordData(Bokmal)]
public override void SingleWordsAreIdentifiedCorrectly(string singleWord) =>
ComputeSingleWordStatistics(singleWord);

[Theory(DisplayName = "word pair detection")]
[WordPairsReportTheory(Implementation.LanguageDetection, Bokmal)]
[WordPairsData(Bokmal)]
public override void WordPairsAreIdentifiedCorrectly(string wordPair) =>
ComputeWordPairStatistics(wordPair);

[Theory(DisplayName = "sentence detection")]
[SentenceReportTheory(Implementation.LanguageDetection, Bokmal)]
[SentenceData(Bokmal)]
public override void EntireSentencesAreIdentifiedCorrectly(string sentence) =>
ComputeSentenceStatistics(sentence);
Expand Down

This file was deleted.

Loading

0 comments on commit 2581492

Please sign in to comment.