public static List<String> getImgStr(String htmlStr){          String img="";          Pattern p_image;          Matcher m_image;          List<String> pics = new ArrayList<String>();         String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址          p_image = Pattern.compile                (regEx_img,Pattern.CASE_INSENSITIVE);         m_image = p_image.matcher(htmlStr);       while(m_image.find()){              img = img + "," + m_image.group();              Matcher m  = Pattern.compile("src=\"?(.*?)                         (\"|>|\\s+)").matcher(img); //匹配src           while(m.find()){              pics.add(m.group(1));           }       }             return pics;      }   

 

public static final Pattern PATTERN  = Pattern.compile("<img\\s+(?:[^>]*)src\\s*=\\s*([^>]+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);                         public static List getImgSrc(String html) {                Matcher matcher = PATTERN.matcher(html);                List list = new ArrayList();                while (matcher.find()) {                  String group = matcher.group(1);                  if (group == null) {                    continue;                  }                  // 这里可能还需要更复杂的判断,用以处理src="...."内的一些转义符                  if (group.startsWith("'")) {                    list.add(group.substring(1, group.indexOf("'", 1)));                  } else if (group.startsWith("\"")) {                    list.add(group.substring(1, group.indexOf("\"", 1)));                  } else {                    list.add(group.split("\\s")[0]);                  }                }                return list;              }

arrow
arrow
    全站熱搜

    戮克 發表在 痞客邦 留言(0) 人氣()