package com.test.parser;
import java.net.URL;
import java.util.Iterator;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class testJsoup {
public static void main(String[] args) {
// TODO Auto-generated method stub
//online parser
//http://try.jsoup.org/~LGB7rk_atM2roavV0d-czMt3J_g
try{
Document doc = Jsoup.connect("http://en.wikipedia.org/").get();
Elements newsHeadlines = doc.select("#mp-itn b a");
// Get first table
Element table = doc.select("table").first();
// Get td Iterator
Iterator <Element> ite = table.select("td").iterator();
// Print content
int cnt = 0;
while (ite.hasNext()) {
cnt++;
System.out.println("Value " + cnt + ": " + ite.next().text());
}
}catch(Exception e){
e.printStackTrace();
}
}
}
廣告
2015年5月28日 星期四
[java] html parser use jsoup lib
complete project https://github.com/cihm/JavaHtmlParser
2015年5月26日 星期二
[linux] crontab 命令裡面 % 的意思是斷行
今天設計了新的java參數讓cronjob跑, 發現cronjob沒有再跑, 後來上網查了發現 是 % 這個符號的關係, 這符號的意思是換行,所以這符號後面的參數我的jar檔就吃不到啦~ 引用 A <percent-sign> character in this field shall be translated to a <newline<. Any character preceded by a <backslash> (including the '%' ) shall cause that character to be treated literally. Only the first line (up to a '%' or end-of-line) of the command field shall be executed by the command interpreter. The other lines shall be made available to the command as standard input.
2015年5月24日 星期日
[gradle] build gradle project without install gralde on linux
添加如下訊息到你的 builde.gradle中
task wrapper(type: Wrapper) {
gradleVersion = '2.3'
}
執行gradle wrapper
在你linux環境的該專案下執行指令 ./gradlew ./gradlew buildif you want install you can link to
http://exponential.io/blog/2015/03/30/install-gradle-on-ubuntu-linux/
[gradle] build jar
添加如下訊息到你的 builde.gradle中
jar {
baseName = 'solrGradleTest'
version = '0.1.0'
}
預設的產出的jar檔會在專案路徑的build中
EX:
C:\Users\1409035\Documents\GitHub\SolrJava\build\libs\solrGradleTest-0.1.0.jar
2015年5月21日 星期四
[gson] read / write large file use gson stream
in order to handle with read/write "big" file,
so use gson stream
read
write
read and write
so use gson stream
read
File jsonInputFile = new File(path);
FileInputStream fisInput;
JsonReader reader;
try {
fisInput = new FileInputStream(jsonInputFile);
reader = new JsonReader(new InputStreamReader(fisInput, "UTF-8"));
ArrayList<Map> categoryALHM = gson.fromJson(reader,
ArrayList.class);
if(null == categoryALHM || categoryALHM.size() == 0){
log.info(path+":is null or empty");
return false;
}
reader.close();
fisInput.close();
} catch (Exception e) {
log.info(e.getMessage().toString());
return false;
}
write
File jsonFile = new File(originalDatpath);
FileOutputStream fos;
JsonWriter writer;
try {
fos = new FileOutputStream(jsonFile);
writer = new JsonWriter(new OutputStreamWriter(fos, "UTF-8"));
writer.setIndent(" ");
Map getCategoryMap = JsonTool.Json2Map(IbobarDataUtil
.viewCategory());
ArrayList<HashMap<String, Object>> getCategoryMapList = (ArrayList) getCategoryMap
.get("list");
log.info("original category:" + getCategoryMapList);
gson.toJson(getCategoryMapList, ArrayList.class, writer);
writer.close();
fos.flush();
fos.close();
} catch (Exception e) {
log.info(e.getMessage().toString());
}
read and write
File jsonInputFile = new File(organizeDataPath);
File jsonOutputFile = new File(usingDatPath);
FileOutputStream fosOutput;
FileInputStream fisInput;
JsonWriter writer;
JsonReader reader;
try {
fosOutput = new FileOutputStream(jsonOutputFile);
writer = new JsonWriter(new OutputStreamWriter(fosOutput, "UTF-8"));
writer.setIndent(" ");
fisInput = new FileInputStream(jsonInputFile);
reader = new JsonReader(new InputStreamReader(fisInput, "UTF-8"));
writer.beginObject();
reader.beginObject();
while (reader.hasNext()) {
String key = reader.nextName();
List<Map> bookOranizeList = gson.fromJson(reader,
ArrayList.class);
writer.name(key);
gson.toJson(bookOranizeList, ArrayList.class, writer);
}
writer.endObject();
writer.close();
fosOutput.flush();
fosOutput.close();
reader.endObject();
reader.close();
fisInput.close();
} catch (Exception e) {
log.info(e.getMessage().toString());
}
2015年5月20日 星期三
[gradle] install nexus sever on ubuntu
install nexus sever on ubuntu 下載 sudo wget http://www.sonatype.org/downloads/nexus-2.1.1.war 改名 mv nexus-2.1.1.war nexus.war 複製到tomcat底下 cp nexus.war /opt/apache-tomcat-7.0.61/webapps/ 於瀏覽器輸入如下網址 http://192.168.22.148:8080/nexus 預設的帳密 admin/admin123 錯誤經驗: 裝好後沒設定任何config. 此時sonatype-work以及設定檔等等就會產在路徑root/底下 設定相關config vim /opt/apache-tomcat-7.0.61/webapps/nexus/WEB-INF/plexus.properties 修改如下內容 nexus-work=/opt/nexus-repo (你可以自訂你想設定的路徑) 測試上傳的lib有沒有到上述所設定的路徑 (用LeoLib為例子) 於網站http://192.168.22.148:8080/nexus登入後 選取 Repositories -> Release -> Artifact(人工) Upload -> GAV Definition 選取 GAV Parameters -> GAV Parameters 設定 ->Group:LeoLib,Artifact:LeoLib,version:1.0 於以下路徑檢查有無上傳成功 /opt/nexus-repo/storage/releases/ gradle.build裡面的設定與nexus 與 Artifact Upload的關係 比如 GAV Parameters 設定 ->Group:LeoLib,Artifact:LeoLib,version:1.0 你的gradle.build裡面的dependencies設定就會如下 compile group:'LeoLib', name:'LeoLib', version:'1.0' 查詢maven上面的lib的 group,name,version. 訊息 http://mvnrepository.com/ security.xml要登入後才會產出來
2015年5月10日 星期日
[searchEngine] [java] Build solr index & search it
this complete project can download from my github
https://github.com/cihm/GradleAndSolr
SolrSearch.java
SolrBuildIndex.java
https://github.com/cihm/GradleAndSolr
SolrSearch.java
package com.job;
import java.net.MalformedURLException;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import com.vo.SolrArgVo;
public class SolrSearch {
public static Logger log = Logger.getLogger(SolrSearch.class.getName());
public boolean solrSearch(SolrArgVo solrArgVo) throws MalformedURLException, SolrServerException{
//HttpSolrServer solr = new HttpSolrServer("http://10.24.100.237:8080/solr/collection1");
HttpSolrServer solr = new HttpSolrServer("http://192.168.22.148:8080/solr/collection1");
SolrQuery query = new SolrQuery();
//英文是精確比對,中文是模糊比對
//查詢條件
query.setQuery( "*:*" );
//query.setFilterQueries("name:" + "登基", "description:" + "登基", "channel:" + "ylib");
String keyword = solrArgVo.getKetWord();
String channel = solrArgVo.getChanneCode();
query.addFilterQuery("channel:" + channel);
query.setQuery("name:"+keyword+" OR description:"+keyword);
//query.setQuery("name:"+"王道 AND currency:"+"NTD");
//can use to be get book by cat
query.setRows(100); //get row of query result//default is 10
//query.setQuery("title:國王");
//query.setQuery("title: art");
QueryResponse response = solr.query(query);
SolrDocumentList results = response.getResults();
System.out.println("NumFound="+results.getNumFound());
System.out.println("SIZE="+results.size());
//System.out.println(results);
//System.out.println(results.get(0).get("title"));
for (int i = 0; i < results.size(); ++i) {
System.out.println("result "+i+"= "+results.get(i).get("id"));
System.out.println("result "+i+"="+ results.get(i));
}
return true;
}
}
SolrBuildIndex.java
package com.job;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
import com.util.DataUtil;
import com.util.SolrConstants;
public class SolrBuildIndex {
public static Logger log = Logger.getLogger(SolrBuildIndex.class.getName());
public boolean solrBuildIndex() throws MalformedURLException, SolrServerException,IOException{
HttpSolrServer server = new HttpSolrServer("http://192.168.22.148:8080/solr/collection1");
// 清空之前建立的索引數據 // delete all doc
server.deleteByQuery( "*:*" );
//提升性能
server.setRequestWriter(new BinaryRequestWriter());
String datFilePath = "C:/Users/1409035/Desktop/FTP_server_backup/candelete/";
//String datFilePath = PropertyLoader.getInstance().getValue(DAT_FILE_PATH_KEY);
Map zinioMap= (HashMap) DataUtil.DeSerialization(datFilePath + "Zinio" + "-" + "getItemListByCategoryCode" + "-using" + ".dat");
Map ylibMap =(HashMap) DataUtil.DeSerialization(datFilePath + "Ylib" + "-" + "getItemListByCategoryCode" + "-using" + ".dat");
Map ibobarMap =(HashMap) DataUtil.DeSerialization(datFilePath + "Ibobar" + "-" + "getItemListByCategoryCode" + "-using" + ".dat");
Map linkingMap =(HashMap) DataUtil.DeSerialization(datFilePath + "Linking" + "-" + "getItemListByCategoryCode" + "-using" + ".dat");
Collection docs2 = new ArrayList();
int k=0;
Iterator linkingIter = linkingMap.entrySet().iterator();
while (linkingIter.hasNext()) {
Map.Entry entry = (Map.Entry) linkingIter.next();
ArrayList> bookAL = (ArrayList>) entry
.getValue();
for (HashMap hm : bookAL) {
k++;
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", k);
doc.addField("channel", "Linking");
doc.addField("name", hm.get("name"));
doc.addField("description", hm.get("description"));
docs2.add(doc);
}
}
Iterator ylibIter = ylibMap.entrySet().iterator();
while (ylibIter.hasNext()) {
Map.Entry entry = (Map.Entry) ylibIter.next();
ArrayList> bookAL = (ArrayList>) entry
.getValue();
for (HashMap hm : bookAL) {
k++;
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", k);
doc.addField("channel", "Ylib");
doc.addField("name", hm.get("name"));
doc.addField("description", hm.get("description"));
System.out.println(doc.toString());
docs2.add(doc);
}
}
Iterator zinioIter = zinioMap.entrySet().iterator();
while(zinioIter.hasNext()) {
Map.Entry entry = (Map.Entry) zinioIter.next();
//下述不能用ALHM 去接,否則會拋出ArrayList can't be cast to ALHM的錯誤
ArrayList> bookAL=(ArrayList>)entry.getValue();
for(HashMap hm:bookAL){
k++;
System.out.println(entry.getKey()+" "+hm.get("Title"));
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", k);
doc.addField("channel", "Zinio");
doc.addField("name", hm.get("name"));
doc.addField("description", hm.get("description"));
docs2.add(doc);
}
}
Iterator ibobarIter = ibobarMap.entrySet().iterator();
while(ibobarIter.hasNext()) {
Map.Entry entry = (Map.Entry) ibobarIter.next();
ArrayList> bookAL=(ArrayList>)entry.getValue();
for(HashMap hm:bookAL){
k++;
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", k);
doc.addField("channel", "Ibobar");
doc.addField("name", hm.get("name"));
doc.addField("description", hm.get("description"));
docs2.add(doc);
}
}
//將ArrayList轉為XML格式
//String resultList=GeneralXmlPullParser.reverse(contentAL);
System.out.println("=======");
//System.out.println(docs2.toString());
server.add(docs2);
server.commit();
server.optimize(true, true);
System.out.println("finish");
return true;
}
}
[gradle] install gradle & use if to build project
window環境下
安裝
. 下載gradle
(gradle-2.3-all.zip)
. 解壓縮到某路徑
(C:\Users\1409035\gradle-2.3)
. 將bin檔路徑加到環境變數path裡面.
. 測試有無成功=>command line打上 gradle -v
==========================================
第一個專案
. 創建一個目錄結構如下的專案
├── build.gradle
└── src
└── main
├── java
│ └── tw
│ └── com
│ └── handler
│ └── SoleHandler.java
└── resources
└── log4j.properties
. SoleHandler.java 程式如下:
package com.handler;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class SolrHandler {
static Log logger = LogFactory.getLog(SolrHandler.class);
public static void main(String[] args) {
logger.info("Hello World");
}
}
. log4j.properties程式如下:
log4j.rootLogger =Info , A1, A2
#inly
# A1 is set to be a ConsoleAppender
log4j.appender.A1 = org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout = org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern = [solrJava.log.%d{yyyyMMdd-HH:mm}][%p][%C-%L] %m%n
# A2 is set to be a file
# produce log record document every day
#log4j.appender.A2 = org.apache.log4j.FileAppender
log4j.appender.A2 = org.apache.log4j.RollingFileAppender
log4j.appender.A2.layout = org.apache.log4j.PatternLayout
log4j.appender.A2.layout.ConversionPattern = [%d{yyyyMMdd-HH:mm}][%p][%C-%L] %m%n
log4j.appender.A2.File = /opt/app/logs/solrJava.log
log4j.appender.A2.DatePattern = '.'yyyyMMdd-HH:mm
log4j.appender.A2.MaxFileSize=10MB
. build.gradle程式如下:
/* 引用 java plugin 獲得編譯 java 專案相關的 task $ */
apply plugin: 'java'
/* 引用 application plugin 獲得執行 java 專案相關的 task $ */
apply plugin:'application'
/* 執行 application plugin 用到的參數 $ */
mainClassName = "com.handler.SolrHandler"
/* 設定 maven repository server $ */
repositories {
mavenCentral()
}
/* 宣告專案的相依函式庫 $ */
dependencies {
compile group: 'commons-logging', name: 'commons-logging', version: '1.1.1'
compile group: 'log4j', name: 'log4j', version: '1.2.16'
}
. 使用 gradle 指令執行 run task
結果如下:
C:\Users\1409035\Documents\GitHub\SolrJava> gradle run
:compileJava UP-TO-DATE
:processResources UP-TO-DATE
:classes UP-TO-DATE
:run
log4j:WARN No such property [datePattern] in org.apache.log4j.RollingFileAppender.
[solrJava.log.20150504-20:25][INFO][com.handler.SolrHandler-11] Hello World
BUILD SUCCESSFUL
. build後folder會產生一些東西:
C:.
├─.gradle
│ └─2.3
│ └─taskArtifacts
├─build
│ ├─classes
│ │ └─main
│ │ └─com
│ │ └─handler
│ ├─dependency-cache
│ ├─resources
│ │ └─main
│ └─tmp
│ └─compileJava
└─src
└─main
├─java
│ └─com
│ └─handler
└─resources
訂閱:
意見 (Atom)