duzx16 2 роки тому
батько
коміт
59f6c9da28

+ 1 - 1
tasks/cot/gsm8k_prompt.txt

@@ -13,4 +13,4 @@ A: There were originally 9 computers. For each of 4 days, 5 more computers were
 Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?
 A: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The answer is 33.
 Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
-A: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The answer is 8
+A: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The answer is 8.

+ 14 - 0
tasks/cot/lastletter.yaml

@@ -0,0 +1,14 @@
+name: 'lastletter'
+type: 'gen'
+module: "tasks.cot.task.ChainOfThoughtTask"
+path: 'symbolic'
+file-pattern:
+  test: "lastletter.jsonl"
+sampling_strategy: "BaseStrategy"
+prompt_path: "tasks/cot/lastletter_prompt.txt"
+deterministic: true
+max_gen_length: 64
+use_task_mask: true
+save_prediction: true
+chain_of_thought: true
+micro_batch_size: 4

+ 2 - 3
tasks/cot/lastletter_prompt.txt

@@ -1,9 +1,8 @@
 Q: Take the last letters of the words in "Elon Musk" and concatenate them.
-A: The last letter of "Elon" is "n". The last letter of "Musk" is "k'. Concatenating them is "nk". The answer is nk
+A: The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk". The answer is nk.
 Q: Take the last letters of the words in "Larry Page" and concatenate them.
 A: The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye". The answer is ye.
 Q: Take the last letters of the words in "Sergey Brin" and concatenate them.
-A: The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn". The answer is
-yn.
+A: The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn". The answer is yn.
 Q: Take the last letters of the words in "Bill Gates" and concatenate them.
 A: The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls". The answer is ls.

+ 2 - 2
tasks/cot/sports_prompt.txt

@@ -1,5 +1,5 @@
-Q: Is the following sentence plausible? "Kyle Palmier was called for slashing."
-A: Kyle Palmier is a hockey player. Being called for slashing is part of hockey. So the answer is yes.
+Q: Is the following sentence plausible? "Kyle Palmieri was called for slashing."
+A: Kyle Palmieri is a hockey player. Being called for slashing is part of hockey. So the answer is yes.
 Q: Is the following sentence plausible? "Joao Moutinho caught the screen pass in the NFC championship."
 A: Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. So the answer is no.
 Q: Is the following sentence plausible? "Carson Wentz set the pick and roll."

+ 22 - 1
tasks/cot/task.py

@@ -74,6 +74,17 @@ def extract_answer(prediction, task_name, chain_of_thought=True):
             answer = match.group(0)
         else:
             answer = "no"
+    elif task_name.startswith("lastletter"):
+        prediction = prediction.lower()
+        if chain_of_thought:
+            pattern = r'(?<=the answer is )[a-z]+'
+        else:
+            pattern = r'[a-z]+'
+        match = re.search(pattern, prediction)
+        if match:
+            answer = match.group(0)
+        else:
+            answer = ""
     else:
         raise NotImplementedError(task_name)
     return answer
@@ -111,7 +122,7 @@ class ChainOfThoughtDataset(GenerationTaskDataset):
 class GSM8KDataset(ChainOfThoughtDataset):
     def process_single_item(self, item, **kwargs):
         item["targets"] = item["answer"].split("####")[1].strip()
-        return super().process_single_item(item)
+        return super().process_single_item(item, **kwargs)
 
 
 class SportsDataset(ChainOfThoughtDataset):
@@ -128,6 +139,14 @@ class SportsDataset(ChainOfThoughtDataset):
             self.data.extend(self.process_single_item(item))
 
 
+class LastLetterDataset(ChainOfThoughtDataset):
+    def process_single_item(self, item, **kwargs):
+        first_name, last_name = item["first_name"], item["last_name"]
+        question = f'Take the last letters of the words in "{first_name} {last_name}" and concatenate them.'
+        item["question"] = question
+        return super().process_single_item(item, **kwargs)
+
+
 class ChainOfThoughtTask(GenerationTask):
     config: ChainOfThoughtConfig
 
@@ -155,6 +174,8 @@ class ChainOfThoughtTask(GenerationTask):
             return GSM8KDataset(os.path.join(self.config.path, relative_path), self.config)
         elif self.config.name.startswith("sports"):
             return SportsDataset(os.path.join(self.config.path, relative_path), self.config)
+        elif self.config.name.startswith("lastletter"):
+            return LastLetterDataset(os.path.join(self.config.path, relative_path), self.config)
         else:
             raise NotImplementedError