Browse Source

Fix #4 - rectify parsing of elements and text to render punctuation and parentheses correctly

Daniel Sheffield 2 years ago
parent
commit
492db7ca77
3 changed files with 52 additions and 39 deletions
  1. 1 1
      pom.xml
  2. 40 35
      src/main/java/PlJavaJSword.java
  3. 11 3
      src/test/java/TestPlJavaJSword.java

+ 1 - 1
pom.xml

@@ -25,7 +25,7 @@
   -->
   
   <properties>
-    <project.build.sourceEncoding>US-ASCII</project.build.sourceEncoding>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
   </properties>
 
   <!-- Here's where you say your project depends on a pljava-api version. -->

+ 40 - 35
src/main/java/PlJavaJSword.java

@@ -35,7 +35,9 @@ public class PlJavaJSword {
   private static final String DEFAULT_VERSIFICATION = "KJV";
   private static final Versification kjv = Versifications.instance().getVersification(DEFAULT_VERSIFICATION);
   private static final Books BOOKS = Books.installed();
-  private static final HashSet<Character> PUNCTUATION = new HashSet<Character>(Arrays.asList('.', ',', ';', ':', '?'));
+  private static final HashSet<Character> PUNCTUATION = new HashSet<Character>(Arrays.asList('.', ',', ';', ':', '?', '!'));
+  private static final HashSet<Character> OPEN_PARENTHESES = new HashSet<Character>(Arrays.asList('[', '('));
+  private static final HashSet<Character> CLOSE_PARENTHESES = new HashSet<Character>(Arrays.asList(']', ')'));
   private static final Logger logger = LoggerFactory.getLogger(PlJavaJSword.class);
 
   @Function(onNullInput=RETURNS_NULL, effects=IMMUTABLE, trust=SANDBOXED)
@@ -55,57 +57,60 @@ public class PlJavaJSword {
       return null;
     }
   }
-  private static String parseText(String text){
-    String t = text.trim();
-    if (t.isEmpty() || PUNCTUATION.contains(t.charAt(0))){
-      return t;
-    } else {
-      return " " + t;
-    }
-  }
-  private static String parseVerse(Iterator<Content> content){
-    String verse = "";
-    while (content.hasNext()){
-      Content c = content.next();
-      switch (c.getCType()){
-        case Element:
-          Element e = (Element)c;
-          switch (e.getName()){
-            case "note":
-              break;
-            default:
-              logger.info(e.getName() + " " + e.getValue());
-              verse += " " + parseContent(e.getContent().listIterator());
-          }
-          break;
-        case Text:
-          verse += parseText(c.getValue());
-          break;
+  private static String join(Iterator<String> parts){
+    String ret = "";
+    while (parts.hasNext()){
+      String part = parts.next().trim();
+      if (part.isEmpty()){
+          continue;
+      }
+      if (ret.isEmpty()){
+        ret += part;
+      }
+      else if (OPEN_PARENTHESES.contains(ret.charAt(ret.length()-1))){
+        ret += part;
+      }
+      else if (CLOSE_PARENTHESES.contains(part.charAt(0))){
+        ret += part;
+      }
+      else if (PUNCTUATION.contains(part.charAt(0))){
+        ret += part;
+      }
+      else {
+        ret += " " + part;
       }
     }
-    return verse.trim();
+    return ret;
   }
   private static String parseContent(Iterator<Content> content){
     List<String> ret = new ArrayList<String>();
     while (content.hasNext()){
       Content c = content.next();
+      String part = null;
       switch(c.getCType()){
         case Element:
           Element e = (Element)c;
+          logger.info(e.getName() + " " + e.getValue());
           switch (e.getName()){
-            case "verse":
-            case "q":
-              String part = parseVerse(e.getContent().listIterator());
-              ret.add(part);
+            case "title":
+            case "note":
+              break;
             default:
-              logger.warn(e.getName() + " " + e.getValue());
+              part = parseContent(e.getContent().listIterator());
+              ret.add(part);
           }
           break;
         case Text:
-          ret.add(parseText(c.getValue()).trim());
+          logger.info("Text: " + c.getValue());
+          part = c.getValue().trim();
+          ret.add(part);
+          break;
+        default:
+          logger.info(c.getCType() + " " + c.getValue());
+          break;
       }
     }
-    return String.join(" ", ret);
+    return join(ret.listIterator());
   }
   @Function(onNullInput=RETURNS_NULL, effects=IMMUTABLE, trust=SANDBOXED)
   public static String getText(String translation, String reference) throws BookException {

+ 11 - 3
src/test/java/TestPlJavaJSword.java

@@ -5,10 +5,15 @@ import org.junit.jupiter.api.Test;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TestPlJavaJSword {
+  private static final String KJV_GEN_1_1 = "In the beginning God created the heaven and the earth.";
+  private static final String MKJV_GEN_1_1 = "In the beginning God created the heaven and the earth.";
   private static final String KJV_GEN_1_1_3 = "In the beginning God created the heaven and the earth. And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters. And God said, Let there be light: and there was light.";
   private static final String MKJV_GEN_1_1_3 = "In the beginning God created the heaven and the earth. And the earth was without form and empty. And darkness was on the face of the deep. And the Spirit of God moved on the face of the waters. And God said, Let there be light. And there was light.";
   private static final String ESV_GEN_1_1 = "In the beginning, God created the heavens and the earth.";
   private static final String ESV_MT_6_33 = "But seek first the kingdom of God and his righteousness, and all these things will be added to you.";
+  private static final String ESV_PSALM_150 = "Praise the Lord! Praise God in his sanctuary; praise him in his mighty heavens! Praise him for his mighty deeds; praise him according to his excellent greatness! Praise him with trumpet sound; praise him with lute and harp! Praise him with tambourine and dance; praise him with strings and pipe! Praise him with sounding cymbals; praise him with loud clashing cymbals! Let everything that has breath praise the Lord! Praise the Lord!";
+  private static final String ESV_2COR_12_19 = "But he said to me, My grace is sufficient for you, for my power is made perfect in weakness. Therefore I will boast all the more gladly of my weaknesses, so that the power of Christ may rest upon me.";
+  private static final String KJV_ACTS_1_8 = "But ye shall receive power, after that the Holy Ghost is come upon you: and ye shall be witnesses unto me both in Jerusalem, and in all Judæa, and in Samaria, and unto the uttermost part of the earth.";
   @Test
   public void testIsValidVerseValidVerse(){
     assertEquals(true, PlJavaJSword.isValidVerse("Gen 1:1"));
@@ -19,7 +24,7 @@ public class TestPlJavaJSword {
   }
   @Test
   public void testText() throws BookException {
-    assertEquals("In the beginning God created the heaven and the earth.", PlJavaJSword.getText("KJV", "Genesis 1:1"));
+    assertEquals(KJV_GEN_1_1, PlJavaJSword.getText("KJV", "Genesis 1:1"));
   }
   @Test
   public void testTextParse() throws BookException {
@@ -34,12 +39,15 @@ public class TestPlJavaJSword {
     assertEquals(MKJV_GEN_1_1_3, PlJavaJSword.getText("MKJV", "Genesis 1:1-3"));
   }
   @Test
-  public void testTextParseMore() throws BookException {
+  public void testTextParseElementsAndPunctuation() throws BookException {
     assertEquals(ESV_MT_6_33, PlJavaJSword.getText("ESV", "Mt. 6:33"));
+    assertEquals(ESV_PSALM_150, PlJavaJSword.getText("ESV", "Ps. 150"));
+    assertEquals(ESV_2COR_12_19, PlJavaJSword.getText("ESV", "2 Cor. 12:9"));
+    assertEquals(KJV_ACTS_1_8, PlJavaJSword.getText("KJV", "Acts 1:8"));
   }
   @Test
   public void testDefaultText() throws BookException {
-    assertEquals("In the beginning God created the heaven and the earth.", PlJavaJSword.getDefaultText("Genesis 1:1"));
+    assertEquals(MKJV_GEN_1_1, PlJavaJSword.getDefaultText("Genesis 1:1"));
   }
   @Test
   public void testDefaultTextMany() throws BookException {