chore(bedrock): add nori identifiers (#1042)

aws-rafams · Rafael Mosca · mergify[bot] · web-flow · commit 1c6434b5ba8e · 2025-03-31T11:34:44.000-05:00
Co-authored-by: Rafael Mosca &lt;rafams@amazon.es&gt;
Co-authored-by: mergify[bot] &lt;37929162+mergify[bot]@users.noreply.github.com&gt;
diff --git a/apidocs/namespaces/opensearchserverless/enumerations/TokenFilterType.md b/apidocs/namespaces/opensearchserverless/enumerations/TokenFilterType.md
@@ -6,44 +6,86 @@
 
 # Enumeration: TokenFilterType
 
+TokenFilterType defines the available token filters for text analysis.
+Token filters process tokens after they have been created by the tokenizer.
+They can modify, add, or remove tokens based on specific rules.
+
 ## Enumeration Members
 
 ### CJK\_WIDTH
 
 > **CJK\_WIDTH**: `"cjk_width"`
 
+Normalizes CJK width differences by converting all characters to their fullwidth or halfwidth variants
+
 ***
 
 ### ICU\_FOLDING
 
 > **ICU\_FOLDING**: `"icu_folding"`
 
+Applies Unicode folding rules for better text matching
+
 ***
 
 ### JA\_STOP
 
 > **JA\_STOP**: `"ja_stop"`
 
+Removes Japanese stop words from text
+
 ***
 
 ### KUROMOJI\_BASEFORM
 
 > **KUROMOJI\_BASEFORM**: `"kuromoji_baseform"`
 
+Converts inflected Japanese words to their base form
+
 ***
 
 ### KUROMOJI\_PART\_OF\_SPEECH
 
 > **KUROMOJI\_PART\_OF\_SPEECH**: `"kuromoji_part_of_speech"`
 
+Tags words with their parts of speech in Japanese text analysis
+
 ***
 
 ### KUROMOJI\_STEMMER
 
 > **KUROMOJI\_STEMMER**: `"kuromoji_stemmer"`
 
+Reduces Japanese words to their stem form
+
 ***
 
 ### LOWERCASE
 
 > **LOWERCASE**: `"lowercase"`
+
+Converts all characters to lowercase
+
+***
+
+### NORI\_NUMBER
+
+> **NORI\_NUMBER**: `"nori_number"`
+
+Normalizes Korean numbers to regular Arabic numbers
+
+***
+
+### NORI\_PART\_OF\_SPEECH
+
+> **NORI\_PART\_OF\_SPEECH**: `"nori_part_of_speech"`
+
+Tags words with their parts of speech in Korean text analysis
+
+***
+
+### NORI\_READINGFORM
+
+> **NORI\_READINGFORM**: `"nori_readingform"`
+
+Converts Korean text to its reading form
diff --git a/apidocs/namespaces/opensearchserverless/enumerations/TokenizerType.md b/apidocs/namespaces/opensearchserverless/enumerations/TokenizerType.md
@@ -12,8 +12,20 @@
 
 > **ICU\_TOKENIZER**: `"icu_tokenizer"`
 
+ICU tokenizer is used for Unicode text segmentation based on UAX #29 rules
+
 ***
 
 ### KUROMOJI\_TOKENIZER
 
 > **KUROMOJI\_TOKENIZER**: `"kuromoji_tokenizer"`
+
+Kuromoji tokenizer is used for Japanese text analysis and segmentation
+
+***
+
+### NORI\_TOKENIZER
+
+> **NORI\_TOKENIZER**: `"nori_tokenizer"`
+
+Nori tokenizer is used for Korean text analysis and segmentation
diff --git a/src/cdk-lib/opensearchserverless/analysis-plugins.ts b/src/cdk-lib/opensearchserverless/analysis-plugins.ts
@@ -19,16 +19,64 @@ export enum CharacterFilterType {
 // Also see the following link for more information regarding supported plugins:
 // https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-genref.html#serverless-plugins
 export enum TokenizerType {
+  /**
+   * Kuromoji tokenizer is used for Japanese text analysis and segmentation
+   */
   KUROMOJI_TOKENIZER = 'kuromoji_tokenizer',
+  /**
+   * ICU tokenizer is used for Unicode text segmentation based on UAX #29 rules
+   */
   ICU_TOKENIZER = 'icu_tokenizer',
+  /**
+   * Nori tokenizer is used for Korean text analysis and segmentation
+   */
+  NORI_TOKENIZER = 'nori_tokenizer',
 }
 
+/**
+ * TokenFilterType defines the available token filters for text analysis.
+ * Token filters process tokens after they have been created by the tokenizer.
+ * They can modify, add, or remove tokens based on specific rules.
+ */
 export enum TokenFilterType {
+  /**
+   * Converts inflected Japanese words to their base form
+   */
   KUROMOJI_BASEFORM = 'kuromoji_baseform',
+  /**
+   * Tags words with their parts of speech in Japanese text analysis
+   */
   KUROMOJI_PART_OF_SPEECH = 'kuromoji_part_of_speech',
+  /**
+   * Reduces Japanese words to their stem form
+   */
   KUROMOJI_STEMMER = 'kuromoji_stemmer',
+  /**
+   * Normalizes CJK width differences by converting all characters to their fullwidth or halfwidth variants
+   */
   CJK_WIDTH = 'cjk_width',
+  /**
+   * Removes Japanese stop words from text
+   */
   JA_STOP = 'ja_stop',
+  /**
+   * Converts all characters to lowercase
+   */
   LOWERCASE = 'lowercase',
+  /**
+   * Applies Unicode folding rules for better text matching
+   */
   ICU_FOLDING = 'icu_folding',
+  /**
+   * Tags words with their parts of speech in Korean text analysis
+   */
+  NORI_PART_OF_SPEECH = 'nori_part_of_speech',
+  /**
+   * Converts Korean text to its reading form
+   */
+  NORI_READINGFORM = 'nori_readingform',
+  /**
+   * Normalizes Korean numbers to regular Arabic numbers
+   */
+  NORI_NUMBER = 'nori_number',
 }