hive -e 'select regexp_replace("$60,825.48","\\$|\,","");'
OK
60825.48
Time taken: 3.912 seconds, Fetched: 1 row(s)
hive -e 'select regexp_replace("$60,825.48","[^0-9.]","");'
60825.48
Time taken: 2.249 seconds, Fetched: 1 row(s)
substring from a string using the TRANSLATE function to replace the substring with the empty string. For your query it would become this:
drop table vp_hiphop;
create table vp_hiphop as
select userid, ntext,
regexp_replace(regexp_replace(ntext, 'hip hop', 'hiphop'), 'rock music', 'rockmusic') as ntext1
from vp_nlp_protext_males;
drop table vp_hiphop;
create table vp_hiphop as
select userid, ntext,
translate(ntext, ' ', '') as ntext1
from vp_nlp_protext_males;
Comments
Post a Comment