public static List<String> getImgStr(String htmlStr){ String img=""; Pattern p_image; Matcher m_image; List<String> pics = new ArrayList<String>(); String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址 p_image = Pattern.compile (regEx_img,Pattern.CASE_INSENSITIVE); m_image = p_image.matcher(htmlStr); while(m_image.find()){ img = img + "," + m_image.group(); Matcher m = Pattern.compile("src=\"?(.*?) (\"|>|\\s+)").matcher(img); //匹配src while(m.find()){ pics.add(m.group(1)); } } return pics; }
public static final Pattern PATTERN = Pattern.compile("<img\\s+(?:[^>]*)src\\s*=\\s*([^>]+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); public static List getImgSrc(String html) { Matcher matcher = PATTERN.matcher(html); List list = new ArrayList(); while (matcher.find()) { String group = matcher.group(1); if (group == null) { continue; } // 这里可能还需要更复杂的判断,用以处理src="...."内的一些转义符 if (group.startsWith("'")) { list.add(group.substring(1, group.indexOf("'", 1))); } else if (group.startsWith("\"")) { list.add(group.substring(1, group.indexOf("\"", 1))); } else { list.add(group.split("\\s")[0]); } } return list; }
留言列表