Initial commit - cleaned repository

This commit is contained in:
jungwoo choi
2025-09-28 20:41:57 +09:00
commit e3c28f796a
188 changed files with 28102 additions and 0 deletions

View File

@ -0,0 +1,105 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema name="site11" version="1.6">
<!-- Field Types -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="int" class="solr.IntPointField" omitNorms="true"/>
<fieldType name="long" class="solr.LongPointField" omitNorms="true"/>
<fieldType name="float" class="solr.FloatPointField" omitNorms="true"/>
<fieldType name="double" class="solr.DoublePointField" omitNorms="true"/>
<fieldType name="date" class="solr.DatePointField" omitNorms="true"/>
<!-- Text field with analysis -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Text field for exact matching -->
<fieldType name="text_exact" class="solr.TextField">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Autocomplete/Suggest field -->
<fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20"/>
</analyzer>
</fieldType>
<!-- Fields -->
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- Document type and metadata -->
<field name="doc_type" type="string" indexed="true" stored="true" docValues="true"/>
<field name="indexed_at" type="date" indexed="true" stored="true"/>
<!-- Common fields across document types -->
<field name="title" type="text_general" indexed="true" stored="true" termVectors="true"/>
<field name="content" type="text_general" indexed="true" stored="true" termVectors="true"/>
<field name="description" type="text_general" indexed="true" stored="true"/>
<field name="summary" type="text_general" indexed="true" stored="true"/>
<field name="tags" type="string" indexed="true" stored="true" multiValued="true" docValues="true"/>
<field name="category" type="string" indexed="true" stored="true" docValues="true"/>
<field name="status" type="string" indexed="true" stored="true" docValues="true"/>
<!-- User-specific fields -->
<field name="user_id" type="string" indexed="true" stored="true"/>
<field name="username" type="text_exact" indexed="true" stored="true"/>
<field name="email" type="text_exact" indexed="true" stored="true"/>
<field name="name" type="text_general" indexed="true" stored="true"/>
<field name="bio" type="text_general" indexed="true" stored="true"/>
<!-- File-specific fields -->
<field name="file_id" type="string" indexed="true" stored="true"/>
<field name="filename" type="text_general" indexed="true" stored="true"/>
<field name="original_name" type="text_general" indexed="true" stored="true"/>
<field name="content_type" type="string" indexed="true" stored="true" docValues="true"/>
<field name="size" type="long" indexed="true" stored="true"/>
<!-- Content-specific fields -->
<field name="content_id" type="string" indexed="true" stored="true"/>
<field name="author_id" type="string" indexed="true" stored="true"/>
<!-- Dates -->
<field name="created_at" type="date" indexed="true" stored="true"/>
<field name="updated_at" type="date" indexed="true" stored="true"/>
<!-- Suggest field for autocomplete -->
<field name="suggest" type="text_suggest" indexed="true" stored="false" multiValued="true"/>
<!-- Copy fields for better search -->
<copyField source="title" dest="suggest"/>
<copyField source="name" dest="suggest"/>
<copyField source="filename" dest="suggest"/>
<copyField source="tags" dest="suggest"/>
<!-- Dynamic fields -->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<!-- Unique Key -->
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,152 @@
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>9.4.0</luceneMatchVersion>
<!-- Data Directory -->
<dataDir>${solr.data.dir:}</dataDir>
<!-- Index Config -->
<indexConfig>
<ramBufferSizeMB>100</ramBufferSizeMB>
<maxBufferedDocs>1000</maxBufferedDocs>
<mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory">
<int name="maxMergeAtOnce">10</int>
<int name="segmentsPerTier">10</int>
</mergePolicyFactory>
</indexConfig>
<!-- Update Handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
<int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
</updateLog>
<autoCommit>
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>${solr.autoSoftCommit.maxTime:1000}</maxTime>
</autoSoftCommit>
</updateHandler>
<!-- Query Settings -->
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<filterCache class="solr.CaffeineCache" size="512" initialSize="512" autowarmCount="0"/>
<queryResultCache class="solr.CaffeineCache" size="512" initialSize="512" autowarmCount="0"/>
<documentCache class="solr.CaffeineCache" size="512" initialSize="512" autowarmCount="0"/>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>20</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
</query>
<!-- Request Dispatcher -->
<requestDispatcher>
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000"
formdataUploadLimitInKB="2048" addHttpRequestToContext="false"/>
<httpCaching never304="true"/>
</requestDispatcher>
<!-- Request Handlers -->
<!-- Standard search handler -->
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">content</str>
<str name="q.op">OR</str>
<str name="defType">edismax</str>
<str name="qf">
title^3.0 name^2.5 content^2.0 description^1.5 summary^1.5
filename^1.5 tags^1.2 category username email bio
</str>
<str name="pf">
title^4.0 name^3.0 content^2.5 description^2.0
</str>
<str name="mm">2&lt;-25%</str>
<str name="hl">true</str>
<str name="hl.fl">title,content,description,summary</str>
<str name="hl.simple.pre">&lt;mark&gt;</str>
<str name="hl.simple.post">&lt;/mark&gt;</str>
<str name="facet">true</str>
<str name="facet.mincount">1</str>
</lst>
</requestHandler>
<!-- Update handler -->
<requestHandler name="/update" class="solr.UpdateRequestHandler"/>
<!-- Get handler -->
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>
<!-- Admin handlers -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<!-- Suggest/Autocomplete handler -->
<requestHandler name="/suggest" class="solr.SearchHandler">
<lst name="defaults">
<str name="suggest">true</str>
<str name="suggest.count">10</str>
<str name="suggest.dictionary">suggest</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
<!-- Spell check component -->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">text_general</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">content</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">4</int>
<float name="maxQueryFrequency">0.01</float>
</lst>
</searchComponent>
<!-- Suggest component -->
<searchComponent name="suggest" class="solr.SuggestComponent">
<lst name="suggester">
<str name="name">suggest</str>
<str name="lookupImpl">FuzzyLookupFactory</str>
<str name="dictionaryImpl">DocumentDictionaryFactory</str>
<str name="field">suggest</str>
<str name="suggestAnalyzerFieldType">text_suggest</str>
<str name="buildOnStartup">false</str>
</lst>
</searchComponent>
<!-- More Like This handler -->
<requestHandler name="/mlt" class="solr.MoreLikeThisHandler">
<lst name="defaults">
<str name="mlt.fl">title,content,description,tags</str>
<int name="mlt.mindf">1</int>
<int name="mlt.mintf">1</int>
<int name="mlt.count">10</int>
</lst>
</requestHandler>
<!-- Schema handler (removed for Solr 9.x compatibility) -->
<!-- Config handler (removed for Solr 9.x compatibility) -->
</config>

View File

@ -0,0 +1,35 @@
# Licensed to the Apache Software Foundation (ASF)
# Standard English stop words
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with

View File

@ -0,0 +1,38 @@
# Synonyms for site11 search
# Format: term1, term2, term3 => all are synonyms
# Or: term1, term2 => term1 is replaced by term2
# Technology synonyms
javascript, js
typescript, ts
python, py
golang, go
database, db
kubernetes, k8s
docker, container, containerization
# Common terms
search, find, query, lookup
upload, import, add
download, export, get
delete, remove, erase
update, modify, edit, change
create, make, new, add
# File related
document, doc, file
image, picture, photo, img
video, movie, clip
audio, sound, music
# User related
user, member, account
admin, administrator, moderator
profile, account, user
# Status
active, enabled, live
inactive, disabled, offline
pending, waiting, processing
complete, done, finished
error, failed, failure