@@ -78,7 +78,13 @@ def __getitem__(self, idx: int) -> Tuple[str, str]:
78
78
79
79
tokens = row ["code_tokens" ]
80
80
body_tokens = tokens [tokens .index (fn_name ) + 2 :]
81
- fn_body_tokens = body_tokens [body_tokens .index ("{" ) + 1 : len (body_tokens ) - 1 ]
81
+ try :
82
+ fn_body_tokens = body_tokens [
83
+ body_tokens .index ("{" ) + 1 : len (body_tokens ) - 1
84
+ ]
85
+ except ValueError as ve : # '{' might be missing
86
+ logging .error ("'%s' fn body extraction failed: %s" , body_tokens , ve )
87
+ fn_body_tokens = None
82
88
83
89
return (fn_name , fn_body , fn_body_tokens )
84
90
@@ -91,6 +97,7 @@ def __len__(self) -> int:
91
97
from functools import lru_cache
92
98
from typing import List
93
99
100
+
94
101
def split_camelcase (camel_case_identifier : str ) -> List [str ]:
95
102
"""
96
103
Split camelCase identifiers.
@@ -158,7 +165,13 @@ def main(args: Namespace) -> None:
158
165
for fn_name , fn_body , fn_body_tokens in dataset :
159
166
if not fn_name or not fn_body :
160
167
continue
161
- src = " " .join (fn_body_tokens ) if args .token_level_sources else fn_body
168
+
169
+ if args .token_level_sources :
170
+ if not fn_body_tokens :
171
+ continue
172
+ src = " " .join (fn_body_tokens ).replace ("\n " , args .newline )
173
+ else :
174
+ src = fn_body
162
175
163
176
if args .word_level_targets :
164
177
tgt = fn_name
0 commit comments